import tensorflow
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Conv2D, MaxPooling2D, Flatten, AveragePooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.python.keras.utils.data_utils import Sequence
from imblearn.over_sampling import RandomOverSampler
from imblearn.keras import balanced_batch_generator
import sklearn
from sklearn.tree import export_graphviz
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.utils import shuffle
from sklearn import svm
import seaborn
import gym
from imblearn.under_sampling import RandomUnderSampler
import math
from IPython.display import SVG
import time
import csv
import os
import cv2
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import scipy as sp
import PIL
import warnings
import time
warnings.filterwarnings('ignore')
# Dictionary to store trained models
models = {}
# Setting up dataFrame to store model results
CNN_results = pd.DataFrame(index=['Model Type',
'Sample size',
'Epochs',
'Validation set Accuracy',
'Test set Accuracy',
'Validation set F1',
'Test set F1',
'Training Time'])
train_data_dir = 'trainingdata/'
# Sample data for full-sized data
sample_rate = .1
To establish an initial baseline, I loaded the data in its' raw form. I have not normalised or resized the images.
num_classes = 4
input_shape = (200, 300, 1)
# Initialise arrays for data storage
X_data = np.ndarray((0, input_shape[0], input_shape[1], input_shape[2]), dtype=np.float)
y_data= np.ndarray(0, dtype=np.float)
# Generate filenames from the data folder and do sampling
image_filenames = [train_data_dir+i for i in os.listdir(train_data_dir) if not i.startswith('.')] # use this for full dataset
# Sample the data
image_filenames = random.sample(image_filenames, int(len(image_filenames)*sample_rate))
# Create a data array for image data
count = len(image_filenames)
X_data_part = np.ndarray((count, input_shape[0], input_shape[1], input_shape[2]), dtype=np.float)
# Iterate throuigh the filenames and for each one load the image, resize and normalise
for i, image_file in enumerate(image_filenames):
# Load the images
image = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
image = image.reshape(input_shape)
X_data_part[i] = image
y_data = np.append(y_data, int(image_file[-6]))
print('Processed {} images'.format(i+1))
# Append the part to the overall data array
X_data = np.append(X_data, X_data_part, axis=0)
print("Data shape: {}".format(X_data.shape))
print(X_data_part[i-1].shape)
Processed 4125 images Data shape: (4125, 200, 300, 1) (200, 300, 1)
seaborn.countplot(y_data)
<AxesSubplot:ylabel='count'>
As seen above, the data is extremely unbalanced.
# Perfrom split to train, validation, test
X_train_plus_valid, X_test, y_train_plus_valid, y_test = train_test_split(X_data, y_data, random_state=0, test_size = 0.30, train_size = 0.7)
# Perfrom split to train, validation, test
X_train, X_valid, y_train, y_valid = train_test_split(X_train_plus_valid, y_train_plus_valid, random_state=0, test_size = 0.20, train_size = 0.8)
X_test[0].shape
(200, 300, 1)
y_train_wide = to_categorical(y_train, num_classes)
y_test_wide = to_categorical(y_test, num_classes)
y_valid_wide = to_categorical(y_valid, num_classes)
I am starting with a super basic model with one convolutional layer without padding, a maxPooling layer and two dense layers.
model = Sequential()
model.add(Conv2D(filters=6, kernel_size=(3, 3),
activation='relu', input_shape=input_shape))
model.add(AveragePooling2D(pool_size=(3,3)))
model.add(Flatten())
model.add(Dense(units=150, activation='relu'))
model.add(Dense(units=4, activation = 'softmax'))
model.compile(loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 198, 298, 6) 60 _________________________________________________________________ average_pooling2d (AveragePo (None, 66, 99, 6) 0 _________________________________________________________________ flatten (Flatten) (None, 39204) 0 _________________________________________________________________ dense (Dense) (None, 150) 5880750 _________________________________________________________________ dense_1 (Dense) (None, 4) 604 ================================================================= Total params: 5,881,414 Trainable params: 5,881,414 Non-trainable params: 0 _________________________________________________________________
batch_size = 128
epochs = 20
# Set up the callback to save the best model based on validaion data
best_weights_filepath = './best_weights_model_1.hdf5'
mcp = ModelCheckpoint(best_weights_filepath, monitor="val_accuracy",
save_best_only=True, save_weights_only=False)
start_time = time.time()
history = model.fit(X_train, y_train_wide,
batch_size=batch_size,
epochs=epochs,
verbose = 1,
validation_split = 0.2,
shuffle=True,
callbacks=[mcp])
end_time = time.time()
model.load_weights(best_weights_filepath)
Epoch 1/20 15/15 [==============================] - 6s 402ms/step - loss: 18.9737 - accuracy: 0.5284 - val_loss: 18.3162 - val_accuracy: 0.6147 Epoch 2/20 15/15 [==============================] - 6s 400ms/step - loss: 9.1102 - accuracy: 0.6286 - val_loss: 4.5134 - val_accuracy: 0.6688 Epoch 3/20 15/15 [==============================] - 6s 399ms/step - loss: 4.2498 - accuracy: 0.6508 - val_loss: 3.2821 - val_accuracy: 0.5628 Epoch 4/20 15/15 [==============================] - 6s 399ms/step - loss: 1.2731 - accuracy: 0.7233 - val_loss: 1.4198 - val_accuracy: 0.6277 Epoch 5/20 15/15 [==============================] - 6s 399ms/step - loss: 0.5737 - accuracy: 0.7537 - val_loss: 0.9289 - val_accuracy: 0.5649 Epoch 6/20 15/15 [==============================] - 6s 409ms/step - loss: 0.3310 - accuracy: 0.7764 - val_loss: 0.7757 - val_accuracy: 0.5952 Epoch 7/20 15/15 [==============================] - 6s 405ms/step - loss: 0.2585 - accuracy: 0.8002 - val_loss: 0.6032 - val_accuracy: 0.6385 Epoch 8/20 15/15 [==============================] - 6s 399ms/step - loss: 0.2156 - accuracy: 0.8278 - val_loss: 0.5634 - val_accuracy: 0.6190 Epoch 9/20 15/15 [==============================] - 6s 398ms/step - loss: 0.1989 - accuracy: 0.8322 - val_loss: 0.5347 - val_accuracy: 0.6753 Epoch 10/20 15/15 [==============================] - 6s 403ms/step - loss: 0.1669 - accuracy: 0.8603 - val_loss: 0.6069 - val_accuracy: 0.5952 Epoch 11/20 15/15 [==============================] - 6s 401ms/step - loss: 0.1566 - accuracy: 0.8771 - val_loss: 0.6014 - val_accuracy: 0.6212 Epoch 12/20 15/15 [==============================] - 6s 406ms/step - loss: 0.1509 - accuracy: 0.8690 - val_loss: 0.6006 - val_accuracy: 0.6537 Epoch 13/20 15/15 [==============================] - 6s 398ms/step - loss: 0.1210 - accuracy: 0.8966 - val_loss: 0.6493 - val_accuracy: 0.6602 Epoch 14/20 15/15 [==============================] - 6s 401ms/step - loss: 0.1190 - accuracy: 0.9015 - val_loss: 0.6830 - val_accuracy: 0.6190 Epoch 15/20 15/15 [==============================] - 6s 430ms/step - loss: 0.1405 - accuracy: 0.9069 - val_loss: 0.7178 - val_accuracy: 0.5974 Epoch 16/20 15/15 [==============================] - 6s 402ms/step - loss: 0.0897 - accuracy: 0.9231 - val_loss: 0.7327 - val_accuracy: 0.6494 Epoch 17/20 15/15 [==============================] - 6s 401ms/step - loss: 0.0856 - accuracy: 0.9237 - val_loss: 0.7550 - val_accuracy: 0.6364 Epoch 18/20 15/15 [==============================] - 6s 396ms/step - loss: 0.0830 - accuracy: 0.9356 - val_loss: 0.8055 - val_accuracy: 0.6255 Epoch 19/20 15/15 [==============================] - 6s 406ms/step - loss: 0.0710 - accuracy: 0.9399 - val_loss: 0.8233 - val_accuracy: 0.6104 Epoch 20/20 15/15 [==============================] - 6s 401ms/step - loss: 0.0684 - accuracy: 0.9437 - val_loss: 0.9411 - val_accuracy: 0.5844
# saving model results
models['model_1'] = model
models['model_1_history'] = history
Below I am defining functions to plot the loss and accuracy of models, this will avoid a lot of repeated code when evaluating other models later.
def plot_loss(key, name):
loss = models[key].history['loss']
val_loss = models[key].history['val_loss']
plt.figure(figsize=(15,8))
plt.xlabel('Epochs')
plt.xticks(rotation=90)
plt.ylabel('Loss')
plt.plot(loss, 'blue', label='Training Loss')
plt.plot(val_loss, 'green', label='Validation Loss')
plt.xticks(range(0,epochs)[0::2])
plt.legend()
plt.title("Loss of train and validation set - " + name)
plt.show()
def plot_accuracy(key, name):
loss = models[key].history['accuracy']
val_loss = models[key].history['val_accuracy']
plt.figure(figsize=(15,8))
plt.xlabel('Epochs')
plt.xticks(rotation=45)
plt.ylabel('Accuracy')
plt.plot(loss, 'blue', label='Training Accuracy')
plt.plot(val_loss, 'green', label='Validation Accuracy')
plt.xticks(range(0,epochs)[0::2])
plt.legend()
plt.title("Accuracy of train and validation set - " + name)
plt.show()
plot_loss('model_1_history', 'Model 1')
plot_accuracy('model_1_history', 'Model 1')
The model has visibly overfit from around the third epoch. Training accuracy continues to grow but validation accuracy has dropped off.
eval_v = metrics.classification_report(y_valid, model.predict_classes(X_valid), output_dict=True)
eval_t = metrics.classification_report(y_test, model.predict_classes(X_test), output_dict=True)
def record_results(model_name, description):
CNN_results[model_name] = [description,
sample_rate, epochs, eval_v['accuracy'],
eval_t['accuracy'], eval_v['macro avg']['f1-score'],
eval_t['macro avg']['f1-score'], end_time-start_time]
record_results('Model 1', 'Unmodified input data and 1 conv layer')
CNN_results
| Model 1 | |
|---|---|
| Model Type | Unmodified input data and 1 conv layer |
| Sample size | 0.1 |
| Epochs | 20 |
| Validation set Accuracy | 0.624567 |
| Test set Accuracy | 0.662359 |
| Validation set F1 | 0.391548 |
| Test set F1 | 0.45446 |
| Training Time | 120.844 |
We can see that the F1 score is significantly lower than the accuracy score, reflecting bias in the model due to the unbalanced data.
Next I will reload the data, this time resizing and normalising it. I've decided to try the data out on a LeNet architecture, so I will be resizing the images to 32 * 32. Thanks to the decreased size of the images, training will be quicker, so I will be using the full training data set available.
# # desired dimensions of our images.
img_width, img_height = 32, 32
# # Set input shape
input_shape = (img_width, img_height, 1)
# Initialise arrays for data storage
X_data = np.ndarray((0, input_shape[0], input_shape[1], input_shape[2]), dtype=np.float)
y_data= np.ndarray(0, dtype=np.float)
# Generate filenames from the data folder
image_filenames = [train_data_dir+i for i in os.listdir(train_data_dir) if not i.startswith('.')]
# Create a data array for image data
count = len(image_filenames)
X_data_part = np.ndarray((count, input_shape[0], input_shape[1], input_shape[2]), dtype=np.float)
# Iterate throuigh the filenames and for each one load the image, resize and normalise
for i, image_file in enumerate(image_filenames):
# Load the images and resize them
image = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
image = cv2.resize(image, (img_height, img_width), interpolation=cv2.INTER_CUBIC)
image = image.reshape(input_shape)
X_data_part[i] = image
# normalise the data
X_data_part[i] = X_data_part[i]/255
# Add label to label array
y_data = np.append(y_data, int(image_file[-6]))
print('Processed {} images'.format(i+1))
# Append the part to the overall data array
X_data = np.append(X_data, X_data_part, axis=0)
print("Data shape: {}".format(X_data.shape))
print(X_data_part[i-1].shape)
Processed 41251 images Data shape: (41251, 32, 32, 1) (32, 32, 1)
# Perfrom split to train, validation, test
X_train_plus_valid, X_test, y_train_plus_valid, y_test = train_test_split(X_data, y_data, random_state=0, test_size = 0.30, train_size = 0.7)
# Perfrom split to train, validation, test
X_train, X_valid, y_train, y_valid = train_test_split(X_train_plus_valid, y_train_plus_valid, random_state=0, test_size = 0.20, train_size = 0.8)
y_train_wide = to_categorical(y_train, num_classes)
y_test_wide = to_categorical(y_test, num_classes)
y_valid_wide = to_categorical(y_valid, num_classes)
Implementing LeNet model below
model = Sequential()
model.add(Conv2D(filters=6, kernel_size=(3, 3),
activation='relu', input_shape=input_shape))
model.add(MaxPooling2D())
model.add(Conv2D(filters=16, kernel_size=(3, 3),
activation='relu', input_shape=input_shape))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(units=120, activation='relu'))
model.add(Dense(units=84, activation='relu'))
model.add(Dense(units=4, activation = 'softmax'))
model.compile(loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
Model: "sequential_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d_1 (Conv2D) (None, 30, 30, 6) 60 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 15, 15, 6) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 13, 13, 16) 880 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 6, 6, 16) 0 _________________________________________________________________ flatten_1 (Flatten) (None, 576) 0 _________________________________________________________________ dense_2 (Dense) (None, 120) 69240 _________________________________________________________________ dense_3 (Dense) (None, 84) 10164 _________________________________________________________________ dense_4 (Dense) (None, 4) 340 ================================================================= Total params: 80,684 Trainable params: 80,684 Non-trainable params: 0 _________________________________________________________________
best_weights_filepath = './best_weights_model_2.hdf5'
mcp = ModelCheckpoint(best_weights_filepath, monitor="val_accuracy",
save_best_only=True, save_weights_only=False)
start_time = time.time()
history = model.fit(X_train, y_train_wide,
batch_size=batch_size,
epochs=epochs,
verbose = 1,
validation_split = 0.2,
shuffle=True,
callbacks=[mcp])
end_time = time.time()
model.load_weights(best_weights_filepath)
Epoch 1/20 145/145 [==============================] - 3s 15ms/step - loss: 0.4304 - accuracy: 0.5236 - val_loss: 0.4004 - val_accuracy: 0.5749 Epoch 2/20 145/145 [==============================] - 2s 14ms/step - loss: 0.3740 - accuracy: 0.6426 - val_loss: 0.3804 - val_accuracy: 0.6236 Epoch 3/20 145/145 [==============================] - 2s 14ms/step - loss: 0.3661 - accuracy: 0.6632 - val_loss: 0.3590 - val_accuracy: 0.6675 Epoch 4/20 145/145 [==============================] - 2s 15ms/step - loss: 0.3566 - accuracy: 0.6706 - val_loss: 0.3557 - val_accuracy: 0.6593 Epoch 5/20 145/145 [==============================] - 2s 14ms/step - loss: 0.3526 - accuracy: 0.6721 - val_loss: 0.3649 - val_accuracy: 0.6636 Epoch 6/20 145/145 [==============================] - 2s 15ms/step - loss: 0.3488 - accuracy: 0.6763 - val_loss: 0.3526 - val_accuracy: 0.6677 Epoch 7/20 145/145 [==============================] - 2s 15ms/step - loss: 0.3470 - accuracy: 0.6749 - val_loss: 0.3478 - val_accuracy: 0.6706 Epoch 8/20 145/145 [==============================] - 2s 15ms/step - loss: 0.3425 - accuracy: 0.6810 - val_loss: 0.3474 - val_accuracy: 0.6725 Epoch 9/20 145/145 [==============================] - 2s 15ms/step - loss: 0.3397 - accuracy: 0.6827 - val_loss: 0.3475 - val_accuracy: 0.6703 Epoch 10/20 145/145 [==============================] - 2s 15ms/step - loss: 0.3384 - accuracy: 0.6789 - val_loss: 0.3612 - val_accuracy: 0.6662 Epoch 11/20 145/145 [==============================] - 2s 16ms/step - loss: 0.3379 - accuracy: 0.6833 - val_loss: 0.3566 - val_accuracy: 0.6602 Epoch 12/20 145/145 [==============================] - 2s 16ms/step - loss: 0.3312 - accuracy: 0.6923 - val_loss: 0.3438 - val_accuracy: 0.6775 Epoch 13/20 145/145 [==============================] - 2s 16ms/step - loss: 0.3284 - accuracy: 0.6883 - val_loss: 0.3400 - val_accuracy: 0.6716 Epoch 14/20 145/145 [==============================] - 2s 16ms/step - loss: 0.3268 - accuracy: 0.6892 - val_loss: 0.3377 - val_accuracy: 0.6764 Epoch 15/20 145/145 [==============================] - 2s 16ms/step - loss: 0.3222 - accuracy: 0.6949 - val_loss: 0.3669 - val_accuracy: 0.6621 Epoch 16/20 145/145 [==============================] - 2s 17ms/step - loss: 0.3231 - accuracy: 0.6941 - val_loss: 0.3393 - val_accuracy: 0.6812 Epoch 17/20 145/145 [==============================] - 2s 16ms/step - loss: 0.3235 - accuracy: 0.6904 - val_loss: 0.3439 - val_accuracy: 0.6801 Epoch 18/20 145/145 [==============================] - 2s 16ms/step - loss: 0.3187 - accuracy: 0.7002 - val_loss: 0.3349 - val_accuracy: 0.6781 Epoch 19/20 145/145 [==============================] - 2s 16ms/step - loss: 0.3226 - accuracy: 0.6900 - val_loss: 0.3430 - val_accuracy: 0.6781 Epoch 20/20 145/145 [==============================] - 2s 16ms/step - loss: 0.3147 - accuracy: 0.6951 - val_loss: 0.3383 - val_accuracy: 0.6768
models['model_2'] = model
models['model_2_history'] = history
plot_loss('model_2_history', 'Model 2')
plot_accuracy('model_2_history', 'Model 2')
eval_v = metrics.classification_report(y_valid, model.predict_classes(X_valid), output_dict=True)
eval_t = metrics.classification_report(y_test, model.predict_classes(X_test), output_dict=True)
record_results('Model 2', '32*32 resized and normalised images on LeNet')
CNN_results
| Model 1 | Model 2 | |
|---|---|---|
| Model Type | Unmodified input data and 1 conv layer | 32*32 resized and normalised images on LeNet |
| Sample size | 0.1 | 1 |
| Epochs | 20 | 20 |
| Validation set Accuracy | 0.624567 | 0.695065 |
| Test set Accuracy | 0.662359 | 0.68948 |
| Validation set F1 | 0.391548 | 0.411632 |
| Test set F1 | 0.45446 | 0.398209 |
| Training Time | 120.844 | 44.9329 |
Training time has significantly decreased, despite using the entire dataset. Accuracy has increased on both the validation and the test set, and the F1 score has improved on the validation set. The F1 score has actually dropped on the Test set though.
Class 1 and class 3 are being under-predicted due to the imbalance in the data. This is can be seen by comparing the accuracy score to the lower F1 score. Data augmentation is a technique used to create new training data. I thought that perhaps by adding extra, augmented data, and manually weighting the classes in favor of the minority classes, we may alleviate some bias.
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
featurewise_center=False,
featurewise_std_normalization=False,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True)
# fit parameters from data
datagen.fit(X_train)
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight(
'balanced',
np.unique(y_train),
y_train)
c_weights = dict(zip(np.unique(y_train), class_weights))
c_weights
{0.0: 0.5364607524384579,
1.0: 8.020833333333334,
2.0: 0.5333394902105651,
3.0: 7.337992376111817}
best_weights_filepath = './best_weights_model_3.hdf5'
mcp = ModelCheckpoint(best_weights_filepath, monitor="val_accuracy",
save_best_only=True, save_weights_only=False)
start_time = time.time()
history = model.fit_generator(datagen.flow(X_train, y_train_wide, batch_size=batch_size),
steps_per_epoch=len(X_train) / batch_size,
validation_data=(X_valid, y_valid_wide),
class_weight=c_weights,
epochs=epochs,
verbose = 1,
shuffle = True,
callbacks=[mcp])
end_time = time.time()
model.load_weights(best_weights_filepath)
Epoch 1/20 180/180 [==============================] - 7s 34ms/step - loss: 0.5760 - accuracy: 0.3793 - val_loss: 0.5150 - val_accuracy: 0.5803 Epoch 2/20 180/180 [==============================] - 6s 34ms/step - loss: 0.5536 - accuracy: 0.4843 - val_loss: 0.4946 - val_accuracy: 0.6168 Epoch 3/20 180/180 [==============================] - 6s 33ms/step - loss: 0.5491 - accuracy: 0.5029 - val_loss: 0.4557 - val_accuracy: 0.6296 Epoch 4/20 180/180 [==============================] - 6s 32ms/step - loss: 0.5441 - accuracy: 0.5302 - val_loss: 0.5073 - val_accuracy: 0.6054 Epoch 5/20 180/180 [==============================] - 6s 32ms/step - loss: 0.5425 - accuracy: 0.5188 - val_loss: 0.4718 - val_accuracy: 0.6140 Epoch 6/20 180/180 [==============================] - 6s 32ms/step - loss: 0.5403 - accuracy: 0.5229 - val_loss: 0.4290 - val_accuracy: 0.6100 Epoch 7/20 180/180 [==============================] - 6s 33ms/step - loss: 0.5393 - accuracy: 0.5342 - val_loss: 0.4532 - val_accuracy: 0.6125 Epoch 8/20 180/180 [==============================] - 6s 33ms/step - loss: 0.5383 - accuracy: 0.5376 - val_loss: 0.4238 - val_accuracy: 0.6116 Epoch 9/20 180/180 [==============================] - 6s 33ms/step - loss: 0.5348 - accuracy: 0.5469 - val_loss: 0.4505 - val_accuracy: 0.6324 Epoch 10/20 180/180 [==============================] - 6s 34ms/step - loss: 0.5337 - accuracy: 0.5472 - val_loss: 0.4270 - val_accuracy: 0.6229 Epoch 11/20 180/180 [==============================] - 6s 34ms/step - loss: 0.5340 - accuracy: 0.5574 - val_loss: 0.4267 - val_accuracy: 0.6338 Epoch 12/20 180/180 [==============================] - 6s 33ms/step - loss: 0.5344 - accuracy: 0.5535 - val_loss: 0.4503 - val_accuracy: 0.5977 Epoch 13/20 180/180 [==============================] - 6s 33ms/step - loss: 0.5336 - accuracy: 0.5507 - val_loss: 0.4197 - val_accuracy: 0.6178 Epoch 14/20 180/180 [==============================] - 6s 33ms/step - loss: 0.5313 - accuracy: 0.5635 - val_loss: 0.4706 - val_accuracy: 0.6388 Epoch 15/20 180/180 [==============================] - 6s 34ms/step - loss: 0.5310 - accuracy: 0.5672 - val_loss: 0.4948 - val_accuracy: 0.5415 Epoch 16/20 180/180 [==============================] - 6s 34ms/step - loss: 0.5297 - accuracy: 0.5548 - val_loss: 0.4355 - val_accuracy: 0.6407 Epoch 17/20 180/180 [==============================] - 6s 34ms/step - loss: 0.5297 - accuracy: 0.5617 - val_loss: 0.4465 - val_accuracy: 0.6369 Epoch 18/20 180/180 [==============================] - 6s 34ms/step - loss: 0.5302 - accuracy: 0.5639 - val_loss: 0.4494 - val_accuracy: 0.6119 Epoch 19/20 180/180 [==============================] - 6s 34ms/step - loss: 0.5282 - accuracy: 0.5681 - val_loss: 0.4411 - val_accuracy: 0.6306 Epoch 20/20 180/180 [==============================] - 6s 34ms/step - loss: 0.5283 - accuracy: 0.5661 - val_loss: 0.4557 - val_accuracy: 0.5912
models['model_3'] = model
models['model_3_history'] = history
plot_loss('model_3_history', 'Model 3')
plot_accuracy('model_3_history', 'Model 3')
Interestingly the validation set is performing quite a bit better than the training set.
eval_v = metrics.classification_report(y_valid, model.predict_classes(X_valid), output_dict=True)
eval_t = metrics.classification_report(y_test, model.predict_classes(X_test), output_dict=True)
record_results('Model 3', 'Previous model architecture with data augmentation and class weights')
CNN_results
| Model 1 | Model 2 | Model 3 | |
|---|---|---|---|
| Model Type | Unmodified input data and 1 conv layer | 32*32 resized and normalised images on LeNet | Previous model architecture with data augmenta... |
| Sample size | 0.1 | 1 | 1 |
| Epochs | 20 | 20 | 20 |
| Validation set Accuracy | 0.624567 | 0.695065 | 0.640693 |
| Test set Accuracy | 0.662359 | 0.68948 | 0.629121 |
| Validation set F1 | 0.391548 | 0.411632 | 0.388973 |
| Test set F1 | 0.45446 | 0.398209 | 0.372987 |
| Training Time | 120.844 | 44.9329 | 121.472 |
Unfortunately this model did not perform well when presented with un-augmented data. Although accuracy did not drop too much, the f1 Scores on both the validation and the test set dropped as compared with the previous model.
The weighting of classes to address bias has not had the desired affect, let's see if data augmentation without class-weighting improves performance.
best_weights_filepath = './best_weights_model_4.hdf5'
mcp = ModelCheckpoint(best_weights_filepath, monitor="val_accuracy",
save_best_only=True, save_weights_only=False)
start_time = time.time()
history = model.fit_generator(datagen.flow(X_train, y_train_wide, batch_size=128),
steps_per_epoch=len(X_train) / 128,
validation_data=(X_valid, y_valid_wide),
epochs=epochs,
verbose = 1,
shuffle=True,
callbacks=[mcp])
end_time = time.time()
model.load_weights(best_weights_filepath)
Epoch 1/20 180/180 [==============================] - 6s 34ms/step - loss: 0.3823 - accuracy: 0.6359 - val_loss: 0.3800 - val_accuracy: 0.6594 Epoch 2/20 180/180 [==============================] - 6s 33ms/step - loss: 0.3740 - accuracy: 0.6448 - val_loss: 0.3683 - val_accuracy: 0.6672 Epoch 3/20 180/180 [==============================] - 6s 33ms/step - loss: 0.3732 - accuracy: 0.6465 - val_loss: 0.3685 - val_accuracy: 0.6606 Epoch 4/20 180/180 [==============================] - 6s 34ms/step - loss: 0.3704 - accuracy: 0.6505 - val_loss: 0.3597 - val_accuracy: 0.6696 Epoch 5/20 180/180 [==============================] - 6s 33ms/step - loss: 0.3713 - accuracy: 0.6492 - val_loss: 0.3833 - val_accuracy: 0.6573 Epoch 6/20 180/180 [==============================] - 6s 34ms/step - loss: 0.3698 - accuracy: 0.6514 - val_loss: 0.3788 - val_accuracy: 0.6717 Epoch 7/20 180/180 [==============================] - 6s 34ms/step - loss: 0.3695 - accuracy: 0.6513 - val_loss: 0.3742 - val_accuracy: 0.6604 Epoch 8/20 180/180 [==============================] - 6s 32ms/step - loss: 0.3691 - accuracy: 0.6520 - val_loss: 0.3627 - val_accuracy: 0.6757 Epoch 9/20 180/180 [==============================] - 6s 33ms/step - loss: 0.3685 - accuracy: 0.6528 - val_loss: 0.3563 - val_accuracy: 0.6769 Epoch 10/20 180/180 [==============================] - 6s 33ms/step - loss: 0.3681 - accuracy: 0.6522 - val_loss: 0.3591 - val_accuracy: 0.6637 Epoch 11/20 180/180 [==============================] - 6s 32ms/step - loss: 0.3677 - accuracy: 0.6549 - val_loss: 0.3584 - val_accuracy: 0.6805 Epoch 12/20 180/180 [==============================] - 6s 33ms/step - loss: 0.3668 - accuracy: 0.6558 - val_loss: 0.3621 - val_accuracy: 0.6746 Epoch 13/20 180/180 [==============================] - 6s 33ms/step - loss: 0.3662 - accuracy: 0.6568 - val_loss: 0.3631 - val_accuracy: 0.6710 Epoch 14/20 180/180 [==============================] - 6s 32ms/step - loss: 0.3666 - accuracy: 0.6560 - val_loss: 0.3657 - val_accuracy: 0.6701 Epoch 15/20 180/180 [==============================] - 6s 33ms/step - loss: 0.3665 - accuracy: 0.6555 - val_loss: 0.3827 - val_accuracy: 0.6648 Epoch 16/20 180/180 [==============================] - 6s 33ms/step - loss: 0.3656 - accuracy: 0.6563 - val_loss: 0.3615 - val_accuracy: 0.6753 Epoch 17/20 180/180 [==============================] - 6s 33ms/step - loss: 0.3658 - accuracy: 0.6581 - val_loss: 0.3601 - val_accuracy: 0.6752 Epoch 18/20 180/180 [==============================] - 6s 33ms/step - loss: 0.3662 - accuracy: 0.6573 - val_loss: 0.3725 - val_accuracy: 0.6719 Epoch 19/20 180/180 [==============================] - 6s 33ms/step - loss: 0.3645 - accuracy: 0.6583 - val_loss: 0.4057 - val_accuracy: 0.6590 Epoch 20/20 180/180 [==============================] - 6s 35ms/step - loss: 0.3648 - accuracy: 0.6587 - val_loss: 0.3644 - val_accuracy: 0.6715
models['model_4'] = model
models['model_4_history'] = history
plot_loss('model_4_history', 'Model 4')
plot_accuracy('model_4_history', 'Model 4')
Again, the validation set is performing well on the augmented data.
eval_v = metrics.classification_report(y_valid, model.predict_classes(X_valid), output_dict=True)
eval_t = metrics.classification_report(y_test, model.predict_classes(X_test), output_dict=True)
record_results('Model 4', 'Data augmentation without manually set class weights')
CNN_results
| Model 1 | Model 2 | Model 3 | Model 4 | |
|---|---|---|---|---|
| Model Type | Unmodified input data and 1 conv layer | 32*32 resized and normalised images on LeNet | Previous model architecture with data augmenta... | Data augmentation without manually set class w... |
| Sample size | 0.1 | 1 | 1 | 1 |
| Epochs | 20 | 20 | 20 | 20 |
| Validation set Accuracy | 0.624567 | 0.695065 | 0.640693 | 0.680519 |
| Test set Accuracy | 0.662359 | 0.68948 | 0.629121 | 0.672754 |
| Validation set F1 | 0.391548 | 0.411632 | 0.388973 | 0.348477 |
| Test set F1 | 0.45446 | 0.398209 | 0.372987 | 0.34481 |
| Training Time | 120.844 | 44.9329 | 121.472 | 120.403 |
Although this model has improved better than the weighted model, it is still underperforming compared to the Model trained on un-augmented data.
When researching data augmentation for unbalanced data, I came upon this article which uses the balanced_batch_generator from Keras to try and rebalance the dataset. The "BalancedDataGenerator" code below is taken from this article: https://medium.com/analytics-vidhya/how-to-apply-data-augmentation-to-deal-with-unbalanced-datasets-in-20-lines-of-code-ada8521320c9
class BalancedDataGenerator(Sequence):
"""ImageDataGenerator + RandomOversampling"""
def __init__(self, x, y, datagen, batch_size=32):
self.datagen = datagen
self.batch_size = min(batch_size, x.shape[0])
datagen.fit(x)
self.gen, self.steps_per_epoch = balanced_batch_generator(x.reshape(x.shape[0], -1), y, sampler=RandomOverSampler(), batch_size=self.batch_size, keep_sparse=True)
self._shape = (self.steps_per_epoch * batch_size, *x.shape[1:])
def __len__(self):
return self.steps_per_epoch
def __getitem__(self, idx):
x_batch, y_batch = self.gen.__next__()
x_batch = x_batch.reshape(-1, *self._shape[1:])
return self.datagen.flow(x_batch, y_batch, batch_size=self.batch_size).next()
balanced_gen = BalancedDataGenerator(X_train, y_train_wide, datagen, batch_size=128)
steps_per_epoch = balanced_gen.steps_per_epoch
best_weights_filepath = './best_weights_model_5.hdf5'
mcp = ModelCheckpoint(best_weights_filepath, monitor="val_accuracy",
save_best_only=True, save_weights_only=False)
start_time = time.time()
history = model.fit_generator(balanced_gen,
steps_per_epoch=steps_per_epoch,
validation_data=(X_valid, y_valid_wide),
epochs=epochs,
verbose = 1,
shuffle=True,
callbacks=[mcp])
end_time = time.time()
model.load_weights(best_weights_filepath)
Epoch 1/20 338/338 [==============================] - 11s 32ms/step - loss: 0.5258 - accuracy: 0.4003 - val_loss: 0.4738 - val_accuracy: 0.5803 Epoch 2/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5195 - accuracy: 0.4090 - val_loss: 0.4657 - val_accuracy: 0.6014 Epoch 3/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5191 - accuracy: 0.4093 - val_loss: 0.4781 - val_accuracy: 0.5984 Epoch 4/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5173 - accuracy: 0.4114 - val_loss: 0.4851 - val_accuracy: 0.6003 Epoch 5/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5171 - accuracy: 0.4120 - val_loss: 0.4796 - val_accuracy: 0.6171 Epoch 6/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5168 - accuracy: 0.4109 - val_loss: 0.4762 - val_accuracy: 0.6052 Epoch 7/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5166 - accuracy: 0.4141 - val_loss: 0.4911 - val_accuracy: 0.5636 Epoch 8/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5166 - accuracy: 0.4141 - val_loss: 0.4776 - val_accuracy: 0.5926 Epoch 9/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5158 - accuracy: 0.4150 - val_loss: 0.5147 - val_accuracy: 0.5493 Epoch 10/20 338/338 [==============================] - 10s 28ms/step - loss: 0.5153 - accuracy: 0.4148 - val_loss: 0.4847 - val_accuracy: 0.5955 Epoch 11/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5154 - accuracy: 0.4150 - val_loss: 0.4761 - val_accuracy: 0.6213 Epoch 12/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5156 - accuracy: 0.4140 - val_loss: 0.4859 - val_accuracy: 0.6095 Epoch 13/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5150 - accuracy: 0.4159 - val_loss: 0.4782 - val_accuracy: 0.6156 Epoch 14/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5150 - accuracy: 0.4162 - val_loss: 0.4779 - val_accuracy: 0.5993 Epoch 15/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5148 - accuracy: 0.4157 - val_loss: 0.4810 - val_accuracy: 0.6038 Epoch 16/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5146 - accuracy: 0.4174 - val_loss: 0.4733 - val_accuracy: 0.5882 Epoch 17/20 338/338 [==============================] - 10s 29ms/step - loss: 0.5145 - accuracy: 0.4167 - val_loss: 0.4781 - val_accuracy: 0.5835 Epoch 18/20 338/338 [==============================] - 10s 30ms/step - loss: 0.5144 - accuracy: 0.4173 - val_loss: 0.4833 - val_accuracy: 0.5754 Epoch 19/20 338/338 [==============================] - 11s 32ms/step - loss: 0.5146 - accuracy: 0.4193 - val_loss: 0.4873 - val_accuracy: 0.6109 Epoch 20/20 338/338 [==============================] - 11s 31ms/step - loss: 0.5143 - accuracy: 0.4166 - val_loss: 0.4773 - val_accuracy: 0.5946
models['model_5'] = model
models['model_5_history'] = history
plot_loss('model_5_history', 'Model 5')
plot_accuracy('model_5_history', 'Model 5')
eval_v = metrics.classification_report(y_valid, model.predict_classes(X_valid), output_dict=True)
eval_t = metrics.classification_report(y_test, model.predict_classes(X_test), output_dict=True)
record_results('Model 5', 'Data augmentation using BalancedDataGenerator')
CNN_results
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | |
|---|---|---|---|---|---|
| Model Type | Unmodified input data and 1 conv layer | 32*32 resized and normalised images on LeNet | Previous model architecture with data augmenta... | Data augmentation without manually set class w... | Data augmentation using BalancedDataGenerator |
| Sample size | 0.1 | 1 | 1 | 1 | 1 |
| Epochs | 20 | 20 | 20 | 20 | 20 |
| Validation set Accuracy | 0.624567 | 0.695065 | 0.640693 | 0.680519 | 0.621299 |
| Test set Accuracy | 0.662359 | 0.68948 | 0.629121 | 0.672754 | 0.617081 |
| Validation set F1 | 0.391548 | 0.411632 | 0.388973 | 0.348477 | 0.403851 |
| Test set F1 | 0.45446 | 0.398209 | 0.372987 | 0.34481 | 0.406119 |
| Training Time | 120.844 | 44.9329 | 121.472 | 120.403 | 199.156 |
This worked quite well comparatively. Although the accuracy score is lower than Model 3 and Model 4, the F1 score on both the Test and Validation set is high. This was very slow to run though, with training time taking over 4 times longer than Model 2, which has relatively comparible F1 scores.
I then decided to try rebalancing the dataset with random undersampling. As I am using the entire dataset to train, I am hoping that undersampling does not affect accuracy or introduce new biases.
from imblearn.under_sampling import RandomUnderSampler
import math
rus = RandomUnderSampler()
X_train_rus, y_train_rus = rus.fit_sample(X_train.reshape(len(X_train), math.prod(input_shape)), y_train)
X_train_rus, y_train_rus = shuffle(X_train_rus, y_train_rus)
X_train_rus = X_train_rus.reshape(len(X_train_rus), input_shape[0], input_shape[1], input_shape[2])
y_train_rus_wide = tensorflow.keras.utils.to_categorical(y_train_rus, num_classes)
seaborn.countplot(y_train_rus)
<AxesSubplot:ylabel='count'>
X_train_rus.shape, X_train.shape
((2880, 32, 32, 1), (23100, 32, 32, 1))
The dataset is now balanced.
datagen.fit(X_train_rus)
best_weights_filepath = './best_weights_model_6.hdf5'
mcp = ModelCheckpoint(best_weights_filepath, monitor="val_accuracy",
save_best_only=True, save_weights_only=False)
start_time = time.time()
history = model.fit_generator(datagen.flow(X_train_rus, y_train_rus_wide, batch_size=batch_size),
steps_per_epoch=len(X_train_rus) / batch_size,
validation_data=(X_valid, y_valid_wide),
epochs=epochs,
verbose = 1,
shuffle=True,
callbacks=[mcp])
end_time = time.time()
model.load_weights(best_weights_filepath)
Epoch 1/20 22/22 [==============================] - 1s 48ms/step - loss: 0.5154 - accuracy: 0.4156 - val_loss: 0.5025 - val_accuracy: 0.5700 Epoch 2/20 22/22 [==============================] - 1s 49ms/step - loss: 0.5174 - accuracy: 0.4212 - val_loss: 0.4736 - val_accuracy: 0.6040 Epoch 3/20 22/22 [==============================] - 1s 48ms/step - loss: 0.5178 - accuracy: 0.4108 - val_loss: 0.5156 - val_accuracy: 0.5309 Epoch 4/20 22/22 [==============================] - 1s 46ms/step - loss: 0.5163 - accuracy: 0.4177 - val_loss: 0.4870 - val_accuracy: 0.5671 Epoch 5/20 22/22 [==============================] - 1s 47ms/step - loss: 0.5176 - accuracy: 0.4125 - val_loss: 0.5072 - val_accuracy: 0.5326 Epoch 6/20 22/22 [==============================] - 1s 48ms/step - loss: 0.5173 - accuracy: 0.4163 - val_loss: 0.4864 - val_accuracy: 0.5609 Epoch 7/20 22/22 [==============================] - 1s 49ms/step - loss: 0.5155 - accuracy: 0.4212 - val_loss: 0.4970 - val_accuracy: 0.5782 Epoch 8/20 22/22 [==============================] - 1s 48ms/step - loss: 0.5160 - accuracy: 0.4149 - val_loss: 0.4916 - val_accuracy: 0.5567 Epoch 9/20 22/22 [==============================] - 1s 47ms/step - loss: 0.5181 - accuracy: 0.4101 - val_loss: 0.4955 - val_accuracy: 0.5328 Epoch 10/20 22/22 [==============================] - 1s 47ms/step - loss: 0.5169 - accuracy: 0.4087 - val_loss: 0.5287 - val_accuracy: 0.4732 Epoch 11/20 22/22 [==============================] - 1s 46ms/step - loss: 0.5160 - accuracy: 0.4253 - val_loss: 0.4959 - val_accuracy: 0.5505 Epoch 12/20 22/22 [==============================] - 1s 47ms/step - loss: 0.5156 - accuracy: 0.4170 - val_loss: 0.4890 - val_accuracy: 0.5477 Epoch 13/20 22/22 [==============================] - 1s 51ms/step - loss: 0.5168 - accuracy: 0.4101 - val_loss: 0.4797 - val_accuracy: 0.5572 Epoch 14/20 22/22 [==============================] - 1s 46ms/step - loss: 0.5153 - accuracy: 0.4264 - val_loss: 0.5043 - val_accuracy: 0.5202 Epoch 15/20 22/22 [==============================] - 1s 47ms/step - loss: 0.5173 - accuracy: 0.4170 - val_loss: 0.5068 - val_accuracy: 0.5626 Epoch 16/20 22/22 [==============================] - 1s 46ms/step - loss: 0.5149 - accuracy: 0.4153 - val_loss: 0.4953 - val_accuracy: 0.5207 Epoch 17/20 22/22 [==============================] - 1s 45ms/step - loss: 0.5149 - accuracy: 0.4181 - val_loss: 0.4999 - val_accuracy: 0.5053 Epoch 18/20 22/22 [==============================] - 1s 46ms/step - loss: 0.5164 - accuracy: 0.4194 - val_loss: 0.5178 - val_accuracy: 0.4798 Epoch 19/20 22/22 [==============================] - 1s 45ms/step - loss: 0.5179 - accuracy: 0.4097 - val_loss: 0.4754 - val_accuracy: 0.5609 Epoch 20/20 22/22 [==============================] - 1s 46ms/step - loss: 0.5136 - accuracy: 0.4236 - val_loss: 0.4800 - val_accuracy: 0.5602
models['model_6'] = model
models['model_6_history'] = history
plot_loss('model_6_history', 'Model 6')
plot_accuracy('model_6_history', 'Model 6')
eval_v = metrics.classification_report(y_valid, model.predict_classes(X_valid), output_dict=True)
eval_t = metrics.classification_report(y_test, model.predict_classes(X_test), output_dict=True)
record_results('Model 6', 'Random UnderSampling to balance the data with Data Augmentation.')
CNN_results
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | |
|---|---|---|---|---|---|---|
| Model Type | Unmodified input data and 1 conv layer | 32*32 resized and normalised images on LeNet | Previous model architecture with data augmenta... | Data augmentation without manually set class w... | Data augmentation using BalancedDataGenerator | Random UnderSampling to balance the data with ... |
| Sample size | 0.1 | 1 | 1 | 1 | 1 | 1 |
| Epochs | 20 | 20 | 20 | 20 | 20 | 20 |
| Validation set Accuracy | 0.624567 | 0.695065 | 0.640693 | 0.680519 | 0.621299 | 0.603983 |
| Test set Accuracy | 0.662359 | 0.68948 | 0.629121 | 0.672754 | 0.617081 | 0.598901 |
| Validation set F1 | 0.391548 | 0.411632 | 0.388973 | 0.348477 | 0.403851 | 0.392735 |
| Test set F1 | 0.45446 | 0.398209 | 0.372987 | 0.34481 | 0.406119 | 0.390721 |
| Training Time | 120.844 | 44.9329 | 121.472 | 120.403 | 199.156 | 21.4401 |
best_weights_filepath = './best_weights_model_7.hdf5'
mcp = ModelCheckpoint(best_weights_filepath, monitor="val_accuracy",
save_best_only=True, save_weights_only=False)
start_time = time.time()
history = model.fit(X_train_rus, y_train_rus_wide,
batch_size=batch_size,
epochs=epochs,
verbose = 1,
validation_split = 0.2,
shuffle=True,
callbacks=[mcp])
end_time = time.time()
model.load_weights(best_weights_filepath)
Epoch 1/20 18/18 [==============================] - 0s 24ms/step - loss: 0.5044 - accuracy: 0.4288 - val_loss: 0.5020 - val_accuracy: 0.4306 Epoch 2/20 18/18 [==============================] - 0s 20ms/step - loss: 0.4975 - accuracy: 0.4379 - val_loss: 0.4972 - val_accuracy: 0.4479 Epoch 3/20 18/18 [==============================] - 0s 21ms/step - loss: 0.4934 - accuracy: 0.4457 - val_loss: 0.4965 - val_accuracy: 0.4375 Epoch 4/20 18/18 [==============================] - 0s 22ms/step - loss: 0.4902 - accuracy: 0.4588 - val_loss: 0.5000 - val_accuracy: 0.4392 Epoch 5/20 18/18 [==============================] - 0s 20ms/step - loss: 0.4838 - accuracy: 0.4727 - val_loss: 0.4961 - val_accuracy: 0.4410 Epoch 6/20 18/18 [==============================] - 0s 19ms/step - loss: 0.4816 - accuracy: 0.4800 - val_loss: 0.5025 - val_accuracy: 0.4531 Epoch 7/20 18/18 [==============================] - 0s 20ms/step - loss: 0.4765 - accuracy: 0.4831 - val_loss: 0.4932 - val_accuracy: 0.4340 Epoch 8/20 18/18 [==============================] - 0s 20ms/step - loss: 0.4730 - accuracy: 0.5004 - val_loss: 0.4952 - val_accuracy: 0.4497 Epoch 9/20 18/18 [==============================] - 0s 20ms/step - loss: 0.4697 - accuracy: 0.5017 - val_loss: 0.4932 - val_accuracy: 0.4306 Epoch 10/20 18/18 [==============================] - 0s 18ms/step - loss: 0.4672 - accuracy: 0.5122 - val_loss: 0.4948 - val_accuracy: 0.4323 Epoch 11/20 18/18 [==============================] - 0s 20ms/step - loss: 0.4627 - accuracy: 0.5039 - val_loss: 0.4935 - val_accuracy: 0.4306 Epoch 12/20 18/18 [==============================] - 0s 21ms/step - loss: 0.4599 - accuracy: 0.5148 - val_loss: 0.4914 - val_accuracy: 0.4358 Epoch 13/20 18/18 [==============================] - 0s 21ms/step - loss: 0.4542 - accuracy: 0.5195 - val_loss: 0.4926 - val_accuracy: 0.4462 Epoch 14/20 18/18 [==============================] - 0s 20ms/step - loss: 0.4529 - accuracy: 0.5213 - val_loss: 0.5026 - val_accuracy: 0.4201 Epoch 15/20 18/18 [==============================] - 0s 21ms/step - loss: 0.4492 - accuracy: 0.5330 - val_loss: 0.4889 - val_accuracy: 0.4514 Epoch 16/20 18/18 [==============================] - 0s 20ms/step - loss: 0.4445 - accuracy: 0.5360 - val_loss: 0.4917 - val_accuracy: 0.4601 Epoch 17/20 18/18 [==============================] - 0s 21ms/step - loss: 0.4399 - accuracy: 0.5308 - val_loss: 0.4948 - val_accuracy: 0.4531 Epoch 18/20 18/18 [==============================] - 0s 19ms/step - loss: 0.4379 - accuracy: 0.5469 - val_loss: 0.4880 - val_accuracy: 0.4618 Epoch 19/20 18/18 [==============================] - 0s 20ms/step - loss: 0.4337 - accuracy: 0.5543 - val_loss: 0.4932 - val_accuracy: 0.4635 Epoch 20/20 18/18 [==============================] - 0s 20ms/step - loss: 0.4308 - accuracy: 0.5525 - val_loss: 0.4945 - val_accuracy: 0.4531
models['model_7'] = model
models['model_7_history'] = history
plot_loss('model_7_history', 'Model 7')
plot_accuracy('model_7_history', 'Model 7')
eval_v = metrics.classification_report(y_valid, model.predict_classes(X_valid), output_dict=True)
eval_t = metrics.classification_report(y_test, model.predict_classes(X_test), output_dict=True)
record_results('Model 7', 'Random UnderSampling to balance the data with no Data Augmentation.')
CNN_results
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | Model 7 | |
|---|---|---|---|---|---|---|---|
| Model Type | Unmodified input data and 1 conv layer | 32*32 resized and normalised images on LeNet | Previous model architecture with data augmenta... | Data augmentation without manually set class w... | Data augmentation using BalancedDataGenerator | Random UnderSampling to balance the data with ... | Random UnderSampling to balance the data with ... |
| Sample size | 0.1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Epochs | 20 | 20 | 20 | 20 | 20 | 20 | 20 |
| Validation set Accuracy | 0.624567 | 0.695065 | 0.640693 | 0.680519 | 0.621299 | 0.603983 | 0.494372 |
| Test set Accuracy | 0.662359 | 0.68948 | 0.629121 | 0.672754 | 0.617081 | 0.598901 | 0.502909 |
| Validation set F1 | 0.391548 | 0.411632 | 0.388973 | 0.348477 | 0.403851 | 0.392735 | 0.356892 |
| Test set F1 | 0.45446 | 0.398209 | 0.372987 | 0.34481 | 0.406119 | 0.390721 | 0.366987 |
| Training Time | 120.844 | 44.9329 | 121.472 | 120.403 | 199.156 | 21.4401 | 7.38834 |
Undersampling makes the models very fast to train, but unfortunately we can see there has been a drop in both accuracy and F1 scores. Of the two undersampled models, the model with data augmentation performed much better than the unaugmented data.
In order to try and reduce the effects of overfitting, I decided to add some drop out to the model
model = Sequential()
model.add(Conv2D(filters=6, kernel_size=(3, 3),
activation='relu', input_shape=input_shape))
model.add(Dropout(0.2))
model.add(MaxPooling2D())
model.add(Conv2D(filters=16, kernel_size=(3, 3),
activation='relu', input_shape=input_shape))
model.add(Dropout(0.2))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(units=120, activation='relu'))
model.add(Dense(units=84, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(units=4, activation = 'softmax'))
model.compile(loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
Model: "sequential_45" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 30, 30, 6) 60 _________________________________________________________________ dropout (Dropout) (None, 30, 30, 6) 0 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 15, 15, 6) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 13, 13, 16) 880 _________________________________________________________________ dropout_1 (Dropout) (None, 13, 13, 16) 0 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 6, 6, 16) 0 _________________________________________________________________ flatten_43 (Flatten) (None, 576) 0 _________________________________________________________________ dense_172 (Dense) (None, 120) 69240 _________________________________________________________________ dense_173 (Dense) (None, 84) 10164 _________________________________________________________________ dropout_2 (Dropout) (None, 84) 0 _________________________________________________________________ dense_174 (Dense) (None, 4) 340 ================================================================= Total params: 80,684 Trainable params: 80,684 Non-trainable params: 0 _________________________________________________________________
best_weights_filepath = './best_weights_model_8.hdf5'
mcp = ModelCheckpoint(best_weights_filepath, monitor="val_accuracy",
save_best_only=True, save_weights_only=False)
start_time = time.time()
history = model.fit(X_train_rus, y_train_rus_wide,
batch_size=batch_size,
epochs=epochs,
verbose = 1,
validation_split = 0.2,
shuffle=True,
callbacks=[mcp])
end_time = time.time()
model.load_weights(best_weights_filepath)
Epoch 1/20 18/18 [==============================] - 1s 28ms/step - loss: 0.6164 - accuracy: 0.2758 - val_loss: 0.5755 - val_accuracy: 0.2413 Epoch 2/20 18/18 [==============================] - 0s 21ms/step - loss: 0.5706 - accuracy: 0.2756 - val_loss: 0.5697 - val_accuracy: 0.2830 Epoch 3/20 18/18 [==============================] - 0s 21ms/step - loss: 0.5700 - accuracy: 0.2848 - val_loss: 0.5632 - val_accuracy: 0.3385 Epoch 4/20 18/18 [==============================] - 0s 22ms/step - loss: 0.5639 - accuracy: 0.3021 - val_loss: 0.5645 - val_accuracy: 0.3524 Epoch 5/20 18/18 [==============================] - 0s 21ms/step - loss: 0.5592 - accuracy: 0.3378 - val_loss: 0.5581 - val_accuracy: 0.3681 Epoch 6/20 18/18 [==============================] - 0s 21ms/step - loss: 0.5509 - accuracy: 0.3676 - val_loss: 0.5499 - val_accuracy: 0.3837 Epoch 7/20 18/18 [==============================] - 0s 21ms/step - loss: 0.5440 - accuracy: 0.3953 - val_loss: 0.5458 - val_accuracy: 0.3976 Epoch 8/20 18/18 [==============================] - 0s 23ms/step - loss: 0.5404 - accuracy: 0.3976 - val_loss: 0.5394 - val_accuracy: 0.3958 Epoch 9/20 18/18 [==============================] - 0s 21ms/step - loss: 0.5386 - accuracy: 0.3877 - val_loss: 0.5357 - val_accuracy: 0.3924 Epoch 10/20 18/18 [==============================] - 0s 23ms/step - loss: 0.5317 - accuracy: 0.4026 - val_loss: 0.5369 - val_accuracy: 0.3941 Epoch 11/20 18/18 [==============================] - 0s 21ms/step - loss: 0.5303 - accuracy: 0.4169 - val_loss: 0.5348 - val_accuracy: 0.3941 Epoch 12/20 18/18 [==============================] - 0s 22ms/step - loss: 0.5274 - accuracy: 0.4076 - val_loss: 0.5310 - val_accuracy: 0.3993 Epoch 13/20 18/18 [==============================] - 0s 23ms/step - loss: 0.5284 - accuracy: 0.4274 - val_loss: 0.5306 - val_accuracy: 0.3906 Epoch 14/20 18/18 [==============================] - 0s 22ms/step - loss: 0.5176 - accuracy: 0.4411 - val_loss: 0.5300 - val_accuracy: 0.3976 Epoch 15/20 18/18 [==============================] - 0s 22ms/step - loss: 0.5176 - accuracy: 0.4348 - val_loss: 0.5274 - val_accuracy: 0.4010 Epoch 16/20 18/18 [==============================] - 0s 23ms/step - loss: 0.5215 - accuracy: 0.4377 - val_loss: 0.5266 - val_accuracy: 0.3906 Epoch 17/20 18/18 [==============================] - 0s 22ms/step - loss: 0.5114 - accuracy: 0.4562 - val_loss: 0.5218 - val_accuracy: 0.3993 Epoch 18/20 18/18 [==============================] - 0s 20ms/step - loss: 0.5080 - accuracy: 0.4627 - val_loss: 0.5247 - val_accuracy: 0.3924 Epoch 19/20 18/18 [==============================] - 0s 21ms/step - loss: 0.5088 - accuracy: 0.4658 - val_loss: 0.5226 - val_accuracy: 0.4132 Epoch 20/20 18/18 [==============================] - 0s 23ms/step - loss: 0.5067 - accuracy: 0.4743 - val_loss: 0.5213 - val_accuracy: 0.4045
models['model_8'] = model
models['model_8_history'] = history
plot_loss('model_8_history', 'Model 8')
plot_accuracy('model_8_history', 'Model 8')
Upon visual inspection, it seems that overfitting has been reduced, the training and validation curves are following similar trends.
eval_v = metrics.classification_report(y_valid, model.predict_classes(X_valid), output_dict=True)
eval_t = metrics.classification_report(y_test, model.predict_classes(X_test), output_dict=True)
record_results('Model 8', 'Previous architecture with added dropout')
CNN_results
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | Model 7 | Model 8 | |
|---|---|---|---|---|---|---|---|---|
| Model Type | Unmodified input data and 1 conv layer | 32*32 resized and normalised images on LeNet | Previous model architecture with data augmenta... | Data augmentation without manually set class w... | Data augmentation using BalancedDataGenerator | Random UnderSampling to balance the data with ... | Random UnderSampling to balance the data with ... | Previous architecture with added dropout |
| Sample size | 0.1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Epochs | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 20 |
| Validation set Accuracy | 0.624567 | 0.695065 | 0.640693 | 0.680519 | 0.621299 | 0.603983 | 0.494372 | 0.473939 |
| Test set Accuracy | 0.662359 | 0.68948 | 0.629121 | 0.672754 | 0.617081 | 0.598901 | 0.502909 | 0.475436 |
| Validation set F1 | 0.391548 | 0.411632 | 0.388973 | 0.348477 | 0.403851 | 0.392735 | 0.356892 | 0.345063 |
| Test set F1 | 0.45446 | 0.398209 | 0.372987 | 0.34481 | 0.406119 | 0.390721 | 0.366987 | 0.346666 |
| Training Time | 120.844 | 44.9329 | 121.472 | 120.403 | 199.156 | 21.4401 | 7.38834 | 8.53818 |
Unfortunately the dropout has not improved performance on the validation and test sets.
As undersampling was not giving the desired performance improvement, I decided to try oversampling with SMOTE.
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=0)
X_train_smo, y_train_smo = sm.fit_sample(X_train.reshape(len(X_train), math.prod(input_shape)), y_train)
X_train_smo, y_train_smo = shuffle(X_train_smo, y_train_smo)
X_train_smo = X_train_smo.reshape(len(X_train_smo), input_shape[0], input_shape[1], input_shape[2])
y_train_smo_wide = tensorflow.keras.utils.to_categorical(y_train_smo, num_classes)
seaborn.countplot(y_train_smo)
<AxesSubplot:ylabel='count'>
pd.set_option('display.max_colwidth', None)
CNN_results
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | Model 7 | Model 8 | |
|---|---|---|---|---|---|---|---|---|
| Model Type | Unmodified input data and 1 conv layer | 32*32 resized and normalised images on LeNet | Previous model architecture with data augmentation and class weights | Data augmentation without manually set class weights | Data augmentation using BalancedDataGenerator | Random UnderSampling to balance the data with Data Augmentation. | Random UnderSampling to balance the data with no Data Augmentation. | Previous architecture with added dropout |
| Sample size | 0.1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Epochs | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 20 |
| Validation set Accuracy | 0.624567 | 0.695065 | 0.640693 | 0.680519 | 0.621299 | 0.603983 | 0.494372 | 0.473939 |
| Test set Accuracy | 0.662359 | 0.68948 | 0.629121 | 0.672754 | 0.617081 | 0.598901 | 0.502909 | 0.475436 |
| Validation set F1 | 0.391548 | 0.411632 | 0.388973 | 0.348477 | 0.403851 | 0.392735 | 0.356892 | 0.345063 |
| Test set F1 | 0.45446 | 0.398209 | 0.372987 | 0.34481 | 0.406119 | 0.390721 | 0.366987 | 0.346666 |
| Training Time | 120.844 | 44.9329 | 121.472 | 120.403 | 199.156 | 21.4401 | 7.38834 | 8.53818 |
I initially ran this for 20 epochs like the other models, but loss on the validation set was still falling, and it didn't look overfit. I decided to give it another 30 epochs to see where it went.
epochs=50
best_weights_filepath = './best_weights_model_9.hdf5'
mcp = ModelCheckpoint(best_weights_filepath, monitor="val_accuracy",
save_best_only=True, save_weights_only=False)
start_time = time.time()
history = model.fit(X_train_smo, y_train_smo_wide,
batch_size=batch_size,
epochs=epochs,
verbose = 1,
validation_split = 0.2,
shuffle=True,
callbacks=[mcp])
end_time = time.time()
model.load_weights(best_weights_filepath)
Epoch 1/50 271/271 [==============================] - 6s 22ms/step - loss: 0.4833 - accuracy: 0.5057 - val_loss: 0.4447 - val_accuracy: 0.5647 Epoch 2/50 271/271 [==============================] - 6s 22ms/step - loss: 0.4416 - accuracy: 0.5693 - val_loss: 0.4094 - val_accuracy: 0.6336 Epoch 3/50 271/271 [==============================] - 6s 22ms/step - loss: 0.4083 - accuracy: 0.6105 - val_loss: 0.3863 - val_accuracy: 0.6552 Epoch 4/50 271/271 [==============================] - 6s 22ms/step - loss: 0.3839 - accuracy: 0.6341 - val_loss: 0.3605 - val_accuracy: 0.6769 Epoch 5/50 271/271 [==============================] - 7s 25ms/step - loss: 0.3664 - accuracy: 0.6571 - val_loss: 0.3403 - val_accuracy: 0.7005 Epoch 6/50 271/271 [==============================] - 9s 33ms/step - loss: 0.3519 - accuracy: 0.6711 - val_loss: 0.3313 - val_accuracy: 0.7041 Epoch 7/50 271/271 [==============================] - 13s 48ms/step - loss: 0.3416 - accuracy: 0.6792 - val_loss: 0.3146 - val_accuracy: 0.7173 Epoch 8/50 271/271 [==============================] - 34s 125ms/step - loss: 0.3318 - accuracy: 0.6888 - val_loss: 0.3496 - val_accuracy: 0.6542 Epoch 9/50 271/271 [==============================] - 56s 208ms/step - loss: 0.3250 - accuracy: 0.6989 - val_loss: 0.2987 - val_accuracy: 0.7220 Epoch 10/50 271/271 [==============================] - 56s 205ms/step - loss: 0.3191 - accuracy: 0.7051 - val_loss: 0.2940 - val_accuracy: 0.7235 Epoch 11/50 271/271 [==============================] - 38s 139ms/step - loss: 0.3133 - accuracy: 0.7100 - val_loss: 0.3074 - val_accuracy: 0.7060 Epoch 12/50 271/271 [==============================] - 30s 110ms/step - loss: 0.3086 - accuracy: 0.7134 - val_loss: 0.2906 - val_accuracy: 0.7317 Epoch 13/50 271/271 [==============================] - 30s 113ms/step - loss: 0.3059 - accuracy: 0.7168 - val_loss: 0.3144 - val_accuracy: 0.6947 Epoch 14/50 271/271 [==============================] - 37s 137ms/step - loss: 0.3012 - accuracy: 0.7207 - val_loss: 0.2814 - val_accuracy: 0.7380 Epoch 15/50 271/271 [==============================] - 40s 146ms/step - loss: 0.2972 - accuracy: 0.7221 - val_loss: 0.3246 - val_accuracy: 0.6799 Epoch 16/50 271/271 [==============================] - 40s 146ms/step - loss: 0.2944 - accuracy: 0.7273 - val_loss: 0.2739 - val_accuracy: 0.7455 Epoch 17/50 271/271 [==============================] - 40s 148ms/step - loss: 0.2922 - accuracy: 0.7294 - val_loss: 0.2794 - val_accuracy: 0.7363 Epoch 18/50 271/271 [==============================] - 34s 126ms/step - loss: 0.2908 - accuracy: 0.7320 - val_loss: 0.2854 - val_accuracy: 0.7205 Epoch 19/50 271/271 [==============================] - 34s 125ms/step - loss: 0.2852 - accuracy: 0.7362 - val_loss: 0.4454 - val_accuracy: 0.5930 Epoch 20/50 271/271 [==============================] - 34s 125ms/step - loss: 0.2850 - accuracy: 0.7393 - val_loss: 0.2785 - val_accuracy: 0.7315 Epoch 21/50 271/271 [==============================] - 36s 131ms/step - loss: 0.2825 - accuracy: 0.7374 - val_loss: 0.3087 - val_accuracy: 0.6988 Epoch 22/50 271/271 [==============================] - 40s 146ms/step - loss: 0.2806 - accuracy: 0.7395 - val_loss: 0.2642 - val_accuracy: 0.7497 Epoch 23/50 271/271 [==============================] - 39s 143ms/step - loss: 0.2787 - accuracy: 0.7446 - val_loss: 0.2984 - val_accuracy: 0.7101 Epoch 24/50 271/271 [==============================] - 35s 129ms/step - loss: 0.2783 - accuracy: 0.7418 - val_loss: 0.2853 - val_accuracy: 0.7257 Epoch 25/50 271/271 [==============================] - 39s 144ms/step - loss: 0.2771 - accuracy: 0.7442 - val_loss: 0.2901 - val_accuracy: 0.7210 Epoch 26/50 271/271 [==============================] - 62s 228ms/step - loss: 0.2762 - accuracy: 0.7451 - val_loss: 0.2672 - val_accuracy: 0.7490 Epoch 27/50 271/271 [==============================] - 56s 206ms/step - loss: 0.2736 - accuracy: 0.7487 - val_loss: 0.2785 - val_accuracy: 0.7320 Epoch 28/50 271/271 [==============================] - 35s 129ms/step - loss: 0.2745 - accuracy: 0.7478 - val_loss: 0.2652 - val_accuracy: 0.7486 Epoch 29/50 271/271 [==============================] - 30s 112ms/step - loss: 0.2717 - accuracy: 0.7487 - val_loss: 0.2652 - val_accuracy: 0.7525 Epoch 30/50 271/271 [==============================] - 30s 111ms/step - loss: 0.2722 - accuracy: 0.7484 - val_loss: 0.3056 - val_accuracy: 0.7038 Epoch 31/50 271/271 [==============================] - 30s 111ms/step - loss: 0.2698 - accuracy: 0.7514 - val_loss: 0.3737 - val_accuracy: 0.6539 Epoch 32/50 271/271 [==============================] - 33s 121ms/step - loss: 0.2711 - accuracy: 0.7508 - val_loss: 0.2703 - val_accuracy: 0.7412 Epoch 33/50 271/271 [==============================] - 37s 138ms/step - loss: 0.2690 - accuracy: 0.7485 - val_loss: 0.3286 - val_accuracy: 0.6852 Epoch 34/50 271/271 [==============================] - 39s 144ms/step - loss: 0.2674 - accuracy: 0.7522 - val_loss: 0.3298 - val_accuracy: 0.6905 Epoch 35/50 271/271 [==============================] - 41s 151ms/step - loss: 0.2662 - accuracy: 0.7550 - val_loss: 0.3774 - val_accuracy: 0.6447 Epoch 36/50 271/271 [==============================] - 39s 145ms/step - loss: 0.2653 - accuracy: 0.7540 - val_loss: 0.3580 - val_accuracy: 0.6559 Epoch 37/50 271/271 [==============================] - 56s 207ms/step - loss: 0.2681 - accuracy: 0.7523 - val_loss: 0.2919 - val_accuracy: 0.7164 Epoch 38/50 271/271 [==============================] - 60s 222ms/step - loss: 0.2639 - accuracy: 0.7566 - val_loss: 0.3199 - val_accuracy: 0.6959 Epoch 39/50 271/271 [==============================] - 44s 161ms/step - loss: 0.2650 - accuracy: 0.7558 - val_loss: 0.2537 - val_accuracy: 0.7582 Epoch 40/50 271/271 [==============================] - 30s 110ms/step - loss: 0.2632 - accuracy: 0.7560 - val_loss: 0.3071 - val_accuracy: 0.7085 Epoch 41/50 271/271 [==============================] - 30s 111ms/step - loss: 0.2641 - accuracy: 0.7551 - val_loss: 0.3518 - val_accuracy: 0.6771 Epoch 42/50 271/271 [==============================] - 30s 111ms/step - loss: 0.2640 - accuracy: 0.7571 - val_loss: 0.3216 - val_accuracy: 0.6969 Epoch 43/50 271/271 [==============================] - 30s 111ms/step - loss: 0.2640 - accuracy: 0.7559 - val_loss: 0.2670 - val_accuracy: 0.7467 Epoch 44/50 271/271 [==============================] - 30s 112ms/step - loss: 0.2630 - accuracy: 0.7560 - val_loss: 0.3568 - val_accuracy: 0.6703 Epoch 45/50 271/271 [==============================] - 30s 111ms/step - loss: 0.2620 - accuracy: 0.7600 - val_loss: 0.3010 - val_accuracy: 0.7151 Epoch 46/50 271/271 [==============================] - 34s 124ms/step - loss: 0.2636 - accuracy: 0.7572 - val_loss: 0.3277 - val_accuracy: 0.6908 Epoch 47/50 271/271 [==============================] - 33s 124ms/step - loss: 0.2627 - accuracy: 0.7569 - val_loss: 0.3242 - val_accuracy: 0.6981 Epoch 48/50 271/271 [==============================] - 34s 125ms/step - loss: 0.2622 - accuracy: 0.7596 - val_loss: 0.3776 - val_accuracy: 0.6614 Epoch 49/50 271/271 [==============================] - 34s 126ms/step - loss: 0.2622 - accuracy: 0.7580 - val_loss: 0.4864 - val_accuracy: 0.5983 Epoch 50/50 271/271 [==============================] - 33s 123ms/step - loss: 0.2620 - accuracy: 0.7592 - val_loss: 0.3922 - val_accuracy: 0.6521
models['model_9'] = model
models['model_9_history'] = history
plot_loss('model_9_history', 'Model 9')
plot_accuracy('model_9_history', 'Model 9')
eval_v = metrics.classification_report(y_valid, model.predict_classes(X_valid), output_dict=True)
eval_t = metrics.classification_report(y_test, model.predict_classes(X_test), output_dict=True)
record_results('Model 9', 'SMOTE, no data augmentation, no dropout')
CNN_results
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | Model 7 | Model 8 | Model 9 | |
|---|---|---|---|---|---|---|---|---|---|
| Model Type | Unmodified input data and 1 conv layer | 32*32 resized and normalised images on LeNet | Previous model architecture with data augmentation and class weights | Data augmentation without manually set class weights | Data augmentation using BalancedDataGenerator | Random UnderSampling to balance the data with Data Augmentation. | Random UnderSampling to balance the data with no Data Augmentation. | Previous architecture with added dropout | SMOTE, no data augmentation, no dropout |
| Sample size | 0.1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Epochs | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 50 |
| Validation set Accuracy | 0.624567 | 0.695065 | 0.640693 | 0.680519 | 0.621299 | 0.603983 | 0.494372 | 0.473939 | 0.566234 |
| Test set Accuracy | 0.662359 | 0.68948 | 0.629121 | 0.672754 | 0.617081 | 0.598901 | 0.502909 | 0.475436 | 0.567469 |
| Validation set F1 | 0.391548 | 0.411632 | 0.388973 | 0.348477 | 0.403851 | 0.392735 | 0.356892 | 0.345063 | 0.407256 |
| Test set F1 | 0.45446 | 0.398209 | 0.372987 | 0.34481 | 0.406119 | 0.390721 | 0.366987 | 0.346666 | 0.410564 |
| Training Time | 120.844 | 44.9329 | 121.472 | 120.403 | 199.156 | 21.4401 | 7.38834 | 8.53818 | 1684.7 |
Smote has outperformed the undersampled data, giving us the highest F1 score we have seen for the test set. But, training was extremely slow.
In order to give the model a better chance of capturing the direction of the rocket in motion, I will stack images together to create sequential input to train on. I have decided to stack three images at a time.
num_classes = 4
img_height, img_width = 32, 32
input_shape = (32, 32, 3)
# Initialise arrays for data storage
X_data = np.ndarray((0, input_shape[0], input_shape[1], input_shape[2]), dtype=np.float)
y_data= np.ndarray(0, dtype=np.float)
# generate filenames from the data folder and do sampling
image_filenames = [train_data_dir+i for i in os.listdir(train_data_dir) if not i.startswith('.')] # use this for full dataset
#print(image_filenames)
##uncomment to sample
#image_filenames = random.sample(image_filenames, int(len(image_filenames)*sample_rate))
# # Create a data array for image data
count = len(image_filenames)
X_data_part = np.ndarray((count, input_shape[0], input_shape[1], input_shape[2]), dtype=np.float)
# Iterate throuigh the filenames and for each one load the image, resize and normalise
i = 0
while i < len(image_filenames):
# Low the images and resize them
images = []
for j in range(0, 3):
if i+j < len(image_filenames):
image = cv2.imread(image_filenames[i+j], cv2.IMREAD_GRAYSCALE)
else:
image = cv2.imread(image_filenames[-1], cv2.IMREAD_GRAYSCALE)
image = cv2.resize(image, (img_height, img_width), interpolation=cv2.INTER_CUBIC)
# image = image.reshape(32, 32, 1)
images.append(image)
stack = np.stack(images, axis = 0)
stack = stack.reshape(32, 32, 3)
X_data_part[i] = stack
X_data_part[i] = X_data_part[i]/255
# print(image_file[-6])
# Add label to label array
if i + j < len(image_filenames):
target = int(image_filenames[i+j][-6])
else:
target - int(image_filenames[-1][-6])
y_data = np.append(y_data, target)
i += 1
print('Processed {} images'.format(i+1))
# Append the part to the overall data array
X_data = np.append(X_data, X_data_part, axis=0)
print("Data shape: {}".format(X_data.shape))
print(X_data_part[i-1].shape)
Processed 41252 images Data shape: (41251, 32, 32, 3) (32, 32, 3)
# Perfrom split to train, validation, test
X_train_plus_valid, X_test, y_train_plus_valid, y_test = train_test_split(X_data, y_data, random_state=0, test_size = 0.30, train_size = 0.7)
# Perfrom split to train, validation, test
X_train, X_valid, y_train, y_valid = train_test_split(X_train_plus_valid, y_train_plus_valid, random_state=0, test_size = 0.20, train_size = 0.8)
y_train_wide = to_categorical(y_train, num_classes)
y_test_wide = to_categorical(y_test, num_classes)
y_valid_wide = to_categorical(y_valid, num_classes)
model = Sequential()
model.add(Conv2D(filters=6, kernel_size=(3, 3),
activation='relu', input_shape=input_shape))
model.add(MaxPooling2D())
model.add(Conv2D(filters=16, kernel_size=(3, 3),
activation='relu', input_shape=input_shape))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(units=120, activation='relu'))
model.add(Dense(units=84, activation='relu'))
model.add(Dense(units=4, activation = 'softmax'))
model.compile(loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
Model: "sequential_3" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d_5 (Conv2D) (None, 30, 30, 6) 168 _________________________________________________________________ max_pooling2d_4 (MaxPooling2 (None, 15, 15, 6) 0 _________________________________________________________________ conv2d_6 (Conv2D) (None, 13, 13, 16) 880 _________________________________________________________________ max_pooling2d_5 (MaxPooling2 (None, 6, 6, 16) 0 _________________________________________________________________ flatten_3 (Flatten) (None, 576) 0 _________________________________________________________________ dense_8 (Dense) (None, 120) 69240 _________________________________________________________________ dense_9 (Dense) (None, 84) 10164 _________________________________________________________________ dense_10 (Dense) (None, 4) 340 ================================================================= Total params: 80,792 Trainable params: 80,792 Non-trainable params: 0 _________________________________________________________________
epochs = 20
# Set up the callback to save the best model based on validaion data
best_weights_filepath = './best_weights_model_10.hdf5'
mcp = ModelCheckpoint(best_weights_filepath, monitor="val_accuracy",
save_best_only=True, save_weights_only=False)
start_time = time.time()
history = model.fit(X_train, y_train_wide,
batch_size=batch_size,
epochs=epochs,
verbose = 1,
validation_split = 0.2,
shuffle=True,
callbacks=[mcp])
end_time = time.time()
model.load_weights(best_weights_filepath)
Epoch 1/20 145/145 [==============================] - 4s 26ms/step - loss: 0.4043 - accuracy: 0.5698 - val_loss: 0.4275 - val_accuracy: 0.5242 Epoch 2/20 145/145 [==============================] - 4s 27ms/step - loss: 0.3906 - accuracy: 0.6224 - val_loss: 0.3889 - val_accuracy: 0.6433 Epoch 3/20 145/145 [==============================] - 4s 30ms/step - loss: 0.3840 - accuracy: 0.6350 - val_loss: 0.3790 - val_accuracy: 0.6457 Epoch 4/20 145/145 [==============================] - 5s 33ms/step - loss: 0.3784 - accuracy: 0.6475 - val_loss: 0.3752 - val_accuracy: 0.6584 Epoch 5/20 145/145 [==============================] - 5s 37ms/step - loss: 0.3741 - accuracy: 0.6552 - val_loss: 0.3711 - val_accuracy: 0.6660 Epoch 6/20 145/145 [==============================] - 5s 38ms/step - loss: 0.3694 - accuracy: 0.6585 - val_loss: 0.3702 - val_accuracy: 0.6680 Epoch 7/20 145/145 [==============================] - 5s 35ms/step - loss: 0.3650 - accuracy: 0.6634 - val_loss: 0.3751 - val_accuracy: 0.6552 Epoch 8/20 145/145 [==============================] - 4s 28ms/step - loss: 0.3615 - accuracy: 0.6659 - val_loss: 0.3628 - val_accuracy: 0.6753 Epoch 9/20 145/145 [==============================] - 4s 25ms/step - loss: 0.3577 - accuracy: 0.6679 - val_loss: 0.3691 - val_accuracy: 0.6732 Epoch 10/20 145/145 [==============================] - 3s 21ms/step - loss: 0.3544 - accuracy: 0.6710 - val_loss: 0.3704 - val_accuracy: 0.6660 Epoch 11/20 145/145 [==============================] - 3s 21ms/step - loss: 0.3508 - accuracy: 0.6721 - val_loss: 0.3617 - val_accuracy: 0.6786 Epoch 12/20 145/145 [==============================] - 3s 21ms/step - loss: 0.3479 - accuracy: 0.6769 - val_loss: 0.3753 - val_accuracy: 0.6703 Epoch 13/20 145/145 [==============================] - 3s 21ms/step - loss: 0.3451 - accuracy: 0.6756 - val_loss: 0.3547 - val_accuracy: 0.6764 Epoch 14/20 145/145 [==============================] - 3s 21ms/step - loss: 0.3419 - accuracy: 0.6788 - val_loss: 0.3551 - val_accuracy: 0.6792 Epoch 15/20 145/145 [==============================] - 3s 21ms/step - loss: 0.3391 - accuracy: 0.6820 - val_loss: 0.3638 - val_accuracy: 0.6745 Epoch 16/20 145/145 [==============================] - 3s 21ms/step - loss: 0.3366 - accuracy: 0.6825 - val_loss: 0.3660 - val_accuracy: 0.6221 Epoch 17/20 145/145 [==============================] - 3s 21ms/step - loss: 0.3330 - accuracy: 0.6872 - val_loss: 0.3558 - val_accuracy: 0.6712 Epoch 18/20 145/145 [==============================] - 3s 22ms/step - loss: 0.3288 - accuracy: 0.6916 - val_loss: 0.3603 - val_accuracy: 0.6736 Epoch 19/20 145/145 [==============================] - 3s 22ms/step - loss: 0.3254 - accuracy: 0.6970 - val_loss: 0.3752 - val_accuracy: 0.6738 Epoch 20/20 145/145 [==============================] - 3s 22ms/step - loss: 0.3204 - accuracy: 0.7004 - val_loss: 0.3803 - val_accuracy: 0.6738
model.save("cnn_sequential.mod")
INFO:tensorflow:Assets written to: cnn_sequential.mod/assets
models['model_10'] = model
models['model_10_history'] = history
plot_loss('model_10_history', 'Model 10')
plot_accuracy('model_10_history', 'Model 10')
eval_v = metrics.classification_report(y_valid, model.predict_classes(X_valid), output_dict=True)
eval_t = metrics.classification_report(y_test, model.predict_classes(X_test), output_dict=True)
record_results('Model 10', 'Sequential, no data augmentation, no dropout')
CNN_results
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | Model 7 | Model 8 | Model 9 | Model 10 | |
|---|---|---|---|---|---|---|---|---|---|---|
| Model Type | Unmodified input data and 1 conv layer | 32*32 resized and normalised images on LeNet | Previous model architecture with data augmentation and class weights | Data augmentation without manually set class weights | Data augmentation using BalancedDataGenerator | Random UnderSampling to balance the data with Data Augmentation. | Random UnderSampling to balance the data with no Data Augmentation. | Previous architecture with added dropout | SMOTE, no data augmentation, no dropout | Sequential, no data augmentation, no dropout |
| Sample size | 0.1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Epochs | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 50 | 20 |
| Validation set Accuracy | 0.624567 | 0.695065 | 0.640693 | 0.680519 | 0.621299 | 0.603983 | 0.494372 | 0.473939 | 0.566234 | 0.666667 |
| Test set Accuracy | 0.662359 | 0.68948 | 0.629121 | 0.672754 | 0.617081 | 0.598901 | 0.502909 | 0.475436 | 0.567469 | 0.681965 |
| Validation set F1 | 0.391548 | 0.411632 | 0.388973 | 0.348477 | 0.403851 | 0.392735 | 0.356892 | 0.345063 | 0.407256 | 0.344405 |
| Test set F1 | 0.45446 | 0.398209 | 0.372987 | 0.34481 | 0.406119 | 0.390721 | 0.366987 | 0.346666 | 0.410564 | 0.350917 |
| Training Time | 120.844 | 44.9329 | 121.472 | 120.403 | 199.156 | 21.4401 | 7.38834 | 8.53818 | 1684.7 | 74.6124 |
Although the test set accuracy is the highest we've seen, the F1 scores are low. The sequential input is not alleviating bias.
In order to hopefully raise F1 scores, we will oversample the sequential input to balance the dataset.
sm = SMOTE(random_state=0)
X_train_smo, y_train_smo = sm.fit_sample(X_train.reshape(len(X_train), math.prod(input_shape)), y_train)
X_train_smo, y_train_smo = shuffle(X_train_smo, y_train_smo)
X_train_smo = X_train_smo.reshape(len(X_train_smo), input_shape[0], input_shape[1], input_shape[2])
y_train_smo_wide = tensorflow.keras.utils.to_categorical(y_train_smo, num_classes)
# Set up the callback to save the best model based on validaion data
best_weights_filepath = './best_weights_model_11.hdf5'
mcp = ModelCheckpoint(best_weights_filepath, monitor="val_accuracy",
save_best_only=True, save_weights_only=False)
start_time = time.time()
history = model.fit(X_train_smo, y_train_smo_wide,
batch_size=batch_size,
epochs=epochs,
verbose = 1,
validation_split = 0.2,
shuffle=True,
callbacks=[mcp])
end_time = time.time()
model.load_weights(best_weights_filepath)
Epoch 1/20 271/271 [==============================] - 5s 20ms/step - loss: 0.4464 - accuracy: 0.5566 - val_loss: 0.4717 - val_accuracy: 0.5155 Epoch 2/20 271/271 [==============================] - 6s 20ms/step - loss: 0.3948 - accuracy: 0.6274 - val_loss: 0.4695 - val_accuracy: 0.5264 Epoch 3/20 271/271 [==============================] - 6s 20ms/step - loss: 0.3595 - accuracy: 0.6669 - val_loss: 0.3592 - val_accuracy: 0.6555 Epoch 4/20 271/271 [==============================] - 6s 20ms/step - loss: 0.3271 - accuracy: 0.7025 - val_loss: 0.3142 - val_accuracy: 0.7125 Epoch 5/20 271/271 [==============================] - 6s 21ms/step - loss: 0.2988 - accuracy: 0.7275 - val_loss: 0.3703 - val_accuracy: 0.6619 Epoch 6/20 271/271 [==============================] - 6s 21ms/step - loss: 0.2765 - accuracy: 0.7497 - val_loss: 0.3192 - val_accuracy: 0.6887 Epoch 7/20 271/271 [==============================] - 6s 21ms/step - loss: 0.2534 - accuracy: 0.7705 - val_loss: 0.2704 - val_accuracy: 0.7465 Epoch 8/20 271/271 [==============================] - 6s 21ms/step - loss: 0.2356 - accuracy: 0.7881 - val_loss: 0.2459 - val_accuracy: 0.7737 Epoch 9/20 271/271 [==============================] - 6s 21ms/step - loss: 0.2186 - accuracy: 0.8050 - val_loss: 0.2343 - val_accuracy: 0.7855 Epoch 10/20 271/271 [==============================] - 6s 21ms/step - loss: 0.2056 - accuracy: 0.8152 - val_loss: 0.2307 - val_accuracy: 0.7874 Epoch 11/20 271/271 [==============================] - 6s 21ms/step - loss: 0.1945 - accuracy: 0.8246 - val_loss: 0.2271 - val_accuracy: 0.7886 Epoch 12/20 271/271 [==============================] - 6s 21ms/step - loss: 0.1835 - accuracy: 0.8343 - val_loss: 0.2208 - val_accuracy: 0.7923 Epoch 13/20 271/271 [==============================] - 6s 21ms/step - loss: 0.1748 - accuracy: 0.8396 - val_loss: 0.2599 - val_accuracy: 0.7651 Epoch 14/20 271/271 [==============================] - 6s 22ms/step - loss: 0.1669 - accuracy: 0.8490 - val_loss: 0.2066 - val_accuracy: 0.8066 Epoch 15/20 271/271 [==============================] - 6s 21ms/step - loss: 0.1596 - accuracy: 0.8543 - val_loss: 0.2249 - val_accuracy: 0.7988 Epoch 16/20 271/271 [==============================] - 6s 21ms/step - loss: 0.1518 - accuracy: 0.8611 - val_loss: 0.2204 - val_accuracy: 0.8041 Epoch 17/20 271/271 [==============================] - 6s 21ms/step - loss: 0.1469 - accuracy: 0.8660 - val_loss: 0.2237 - val_accuracy: 0.7878 Epoch 18/20 271/271 [==============================] - 6s 21ms/step - loss: 0.1406 - accuracy: 0.8721 - val_loss: 0.2089 - val_accuracy: 0.8093 Epoch 19/20 271/271 [==============================] - 6s 21ms/step - loss: 0.1355 - accuracy: 0.8769 - val_loss: 0.2106 - val_accuracy: 0.8000 Epoch 20/20 271/271 [==============================] - 6s 21ms/step - loss: 0.1308 - accuracy: 0.8812 - val_loss: 0.2133 - val_accuracy: 0.8070
model.save("cnn_balanced_sequential.mod")
INFO:tensorflow:Assets written to: cnn_balanced_sequential.mod/assets
models['model_11'] = model
models['model_11_history'] = history
plot_loss('model_11_history', 'Model 11')
plot_accuracy('model_11_history', 'Model 11')
eval_v = metrics.classification_report(y_valid, model.predict_classes(X_valid), output_dict=True)
eval_t = metrics.classification_report(y_test, model.predict_classes(X_test), output_dict=True)
record_results('Model 11', 'Sequential, with SMOTE')
CNN_results
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | Model 7 | Model 8 | Model 9 | Model 10 | Model 11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Model Type | Unmodified input data and 1 conv layer | 32*32 resized and normalised images on LeNet | Previous model architecture with data augmentation and class weights | Data augmentation without manually set class weights | Data augmentation using BalancedDataGenerator | Random UnderSampling to balance the data with Data Augmentation. | Random UnderSampling to balance the data with no Data Augmentation. | Previous architecture with added dropout | SMOTE, no data augmentation, no dropout | Sequential, no data augmentation, no dropout | Sequential, with SMOTE |
| Sample size | 0.1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Epochs | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 50 | 20 | 20 |
| Validation set Accuracy | 0.624567 | 0.695065 | 0.640693 | 0.680519 | 0.621299 | 0.603983 | 0.494372 | 0.473939 | 0.566234 | 0.666667 | 0.602251 |
| Test set Accuracy | 0.662359 | 0.68948 | 0.629121 | 0.672754 | 0.617081 | 0.598901 | 0.502909 | 0.475436 | 0.567469 | 0.681965 | 0.608436 |
| Validation set F1 | 0.391548 | 0.411632 | 0.388973 | 0.348477 | 0.403851 | 0.392735 | 0.356892 | 0.345063 | 0.407256 | 0.344405 | 0.384198 |
| Test set F1 | 0.45446 | 0.398209 | 0.372987 | 0.34481 | 0.406119 | 0.390721 | 0.366987 | 0.346666 | 0.410564 | 0.350917 | 0.390147 |
| Training Time | 120.844 | 44.9329 | 121.472 | 120.403 | 199.156 | 21.4401 | 7.38834 | 8.53818 | 1684.7 | 74.6124 | 113.332 |
Results are disappointing. There has been a small bump in the F1 scores, but they are still lower than previous models with single frame input.
Given the simplicity of the images we are presenting to the network, a pre-trained model is probably overkill, but out of curiosity I decided to try it out.
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential, Model
# build the VGG16 network
vgg16_model = VGG16(weights='imagenet', include_top=False, input_shape = input_shape)
display(vgg16_model.summary())
Model: "vgg16" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, 32, 32, 3)] 0 _________________________________________________________________ block1_conv1 (Conv2D) (None, 32, 32, 64) 1792 _________________________________________________________________ block1_conv2 (Conv2D) (None, 32, 32, 64) 36928 _________________________________________________________________ block1_pool (MaxPooling2D) (None, 16, 16, 64) 0 _________________________________________________________________ block2_conv1 (Conv2D) (None, 16, 16, 128) 73856 _________________________________________________________________ block2_conv2 (Conv2D) (None, 16, 16, 128) 147584 _________________________________________________________________ block2_pool (MaxPooling2D) (None, 8, 8, 128) 0 _________________________________________________________________ block3_conv1 (Conv2D) (None, 8, 8, 256) 295168 _________________________________________________________________ block3_conv2 (Conv2D) (None, 8, 8, 256) 590080 _________________________________________________________________ block3_conv3 (Conv2D) (None, 8, 8, 256) 590080 _________________________________________________________________ block3_pool (MaxPooling2D) (None, 4, 4, 256) 0 _________________________________________________________________ block4_conv1 (Conv2D) (None, 4, 4, 512) 1180160 _________________________________________________________________ block4_conv2 (Conv2D) (None, 4, 4, 512) 2359808 _________________________________________________________________ block4_conv3 (Conv2D) (None, 4, 4, 512) 2359808 _________________________________________________________________ block4_pool (MaxPooling2D) (None, 2, 2, 512) 0 _________________________________________________________________ block5_conv1 (Conv2D) (None, 2, 2, 512) 2359808 _________________________________________________________________ block5_conv2 (Conv2D) (None, 2, 2, 512) 2359808 _________________________________________________________________ block5_conv3 (Conv2D) (None, 2, 2, 512) 2359808 _________________________________________________________________ block5_pool (MaxPooling2D) (None, 1, 1, 512) 0 ================================================================= Total params: 14,714,688 Trainable params: 14,714,688 Non-trainable params: 0 _________________________________________________________________
None
vgg16_last_layer = vgg16_model.output
# build a classifier model to put on top of the VGG16 model
x1 = Flatten()(vgg16_last_layer)
x2 = Dense(256, activation='relu')(x1)
x3 = Dropout(0.5)(x2)
final_layer = Dense(num_classes, activation = 'softmax')(x3)
# Assemble the full model out of both parts
full_model = Model(vgg16_model.input, final_layer)
# moving over weights from a pre-trained smaller model specifically for our problem might help rather than random initialisation.
#top_weights_filepath = './best_weights_notebook22.hdf5'
#old_model = keras.models.load_model(top_weights_filepath)
#full_model.layers[-1].set_weights(old_model.layers[-1].get_weights())
# set the first 17 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
for layer in vgg16_model.layers:
layer.trainable = False
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
full_model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
full_model.summary()
Model: "model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, 32, 32, 3)] 0 _________________________________________________________________ block1_conv1 (Conv2D) (None, 32, 32, 64) 1792 _________________________________________________________________ block1_conv2 (Conv2D) (None, 32, 32, 64) 36928 _________________________________________________________________ block1_pool (MaxPooling2D) (None, 16, 16, 64) 0 _________________________________________________________________ block2_conv1 (Conv2D) (None, 16, 16, 128) 73856 _________________________________________________________________ block2_conv2 (Conv2D) (None, 16, 16, 128) 147584 _________________________________________________________________ block2_pool (MaxPooling2D) (None, 8, 8, 128) 0 _________________________________________________________________ block3_conv1 (Conv2D) (None, 8, 8, 256) 295168 _________________________________________________________________ block3_conv2 (Conv2D) (None, 8, 8, 256) 590080 _________________________________________________________________ block3_conv3 (Conv2D) (None, 8, 8, 256) 590080 _________________________________________________________________ block3_pool (MaxPooling2D) (None, 4, 4, 256) 0 _________________________________________________________________ block4_conv1 (Conv2D) (None, 4, 4, 512) 1180160 _________________________________________________________________ block4_conv2 (Conv2D) (None, 4, 4, 512) 2359808 _________________________________________________________________ block4_conv3 (Conv2D) (None, 4, 4, 512) 2359808 _________________________________________________________________ block4_pool (MaxPooling2D) (None, 2, 2, 512) 0 _________________________________________________________________ block5_conv1 (Conv2D) (None, 2, 2, 512) 2359808 _________________________________________________________________ block5_conv2 (Conv2D) (None, 2, 2, 512) 2359808 _________________________________________________________________ block5_conv3 (Conv2D) (None, 2, 2, 512) 2359808 _________________________________________________________________ block5_pool (MaxPooling2D) (None, 1, 1, 512) 0 _________________________________________________________________ flatten_4 (Flatten) (None, 512) 0 _________________________________________________________________ dense_11 (Dense) (None, 256) 131328 _________________________________________________________________ dropout_3 (Dropout) (None, 256) 0 _________________________________________________________________ dense_12 (Dense) (None, 4) 1028 ================================================================= Total params: 14,847,044 Trainable params: 132,356 Non-trainable params: 14,714,688 _________________________________________________________________
epochs = 20
# Set up the callback to save the best model based on validaion data - notebook 2.2 needs to be run first.
best_weights_filepath = './best_weights_model_12.hdf5'
mcp = ModelCheckpoint(best_weights_filepath, monitor="val_loss",
save_best_only=True, save_weights_only=False)
start_time = time.time()
history = full_model.fit(X_train_smo, y_train_smo_wide,
batch_size=batch_size,
epochs=epochs,
verbose = 1,
validation_split = 0.2,
shuffle=True,
callbacks=[mcp])
end_time = time.time()
Epoch 1/20 271/271 [==============================] - 66s 242ms/step - loss: 0.5237 - accuracy: 0.3947 - val_loss: 0.5165 - val_accuracy: 0.4188 Epoch 2/20 271/271 [==============================] - 67s 247ms/step - loss: 0.5224 - accuracy: 0.4005 - val_loss: 0.5149 - val_accuracy: 0.4219 Epoch 3/20 271/271 [==============================] - 66s 245ms/step - loss: 0.5206 - accuracy: 0.4032 - val_loss: 0.5132 - val_accuracy: 0.4290 Epoch 4/20 271/271 [==============================] - 67s 247ms/step - loss: 0.5194 - accuracy: 0.4054 - val_loss: 0.5149 - val_accuracy: 0.4190 Epoch 5/20 271/271 [==============================] - 67s 248ms/step - loss: 0.5188 - accuracy: 0.4044 - val_loss: 0.5114 - val_accuracy: 0.4255 Epoch 6/20 271/271 [==============================] - 69s 253ms/step - loss: 0.5170 - accuracy: 0.4127 - val_loss: 0.5088 - val_accuracy: 0.4332 Epoch 7/20 271/271 [==============================] - 68s 252ms/step - loss: 0.5153 - accuracy: 0.4145 - val_loss: 0.5081 - val_accuracy: 0.4401 Epoch 8/20 271/271 [==============================] - 70s 259ms/step - loss: 0.5137 - accuracy: 0.4154 - val_loss: 0.5079 - val_accuracy: 0.4333 Epoch 9/20 271/271 [==============================] - 73s 268ms/step - loss: 0.5123 - accuracy: 0.4220 - val_loss: 0.5052 - val_accuracy: 0.4362 Epoch 10/20 271/271 [==============================] - 68s 253ms/step - loss: 0.5118 - accuracy: 0.4234 - val_loss: 0.5078 - val_accuracy: 0.4364 Epoch 11/20 271/271 [==============================] - 68s 252ms/step - loss: 0.5114 - accuracy: 0.4227 - val_loss: 0.5053 - val_accuracy: 0.4402 Epoch 12/20 271/271 [==============================] - 69s 256ms/step - loss: 0.5100 - accuracy: 0.4239 - val_loss: 0.5016 - val_accuracy: 0.4465 Epoch 13/20 271/271 [==============================] - 70s 260ms/step - loss: 0.5087 - accuracy: 0.4279 - val_loss: 0.5032 - val_accuracy: 0.4407 Epoch 14/20 271/271 [==============================] - 69s 255ms/step - loss: 0.5090 - accuracy: 0.4294 - val_loss: 0.5046 - val_accuracy: 0.4345 Epoch 15/20 271/271 [==============================] - 68s 251ms/step - loss: 0.5078 - accuracy: 0.4255 - val_loss: 0.5021 - val_accuracy: 0.4511 Epoch 16/20 271/271 [==============================] - 68s 251ms/step - loss: 0.5080 - accuracy: 0.4283 - val_loss: 0.5022 - val_accuracy: 0.4412 Epoch 17/20 271/271 [==============================] - 69s 255ms/step - loss: 0.5062 - accuracy: 0.4339 - val_loss: 0.4990 - val_accuracy: 0.4524 Epoch 18/20 271/271 [==============================] - 71s 262ms/step - loss: 0.5064 - accuracy: 0.4329 - val_loss: 0.4975 - val_accuracy: 0.4491 Epoch 19/20 271/271 [==============================] - 70s 257ms/step - loss: 0.5048 - accuracy: 0.4376 - val_loss: 0.4967 - val_accuracy: 0.4536 Epoch 20/20 271/271 [==============================] - 71s 261ms/step - loss: 0.5034 - accuracy: 0.4370 - val_loss: 0.4946 - val_accuracy: 0.4567
full_model.load_weights(best_weights_filepath)
models['model_12'] = full_model
models['model_12_history'] = history
plot_loss('model_12_history', 'Model 12')
plot_accuracy('model_12_history', 'Model 12')
eval_v = metrics.classification_report(y_valid, np.argmax(full_model.predict(X_valid), axis=-1), output_dict=True)
eval_t = metrics.classification_report(y_test, np.argmax(full_model.predict(X_test), axis=-1), output_dict=True)
record_results('Model 12', 'Sequential, with SMOTE and VGA pre-trained')
CNN_results
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | Model 7 | Model 8 | Model 9 | Model 10 | Model 11 | Model 12 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Model Type | Unmodified input data and 1 conv layer | 32*32 resized and normalised images on LeNet | Previous model architecture with data augmentation and class weights | Data augmentation without manually set class weights | Data augmentation using BalancedDataGenerator | Random UnderSampling to balance the data with Data Augmentation. | Random UnderSampling to balance the data with no Data Augmentation. | Previous architecture with added dropout | SMOTE, no data augmentation, no dropout | Sequential, no data augmentation, no dropout | Sequential, with SMOTE | Sequential, with SMOTE and VGA pre-trained |
| Sample size | 0.1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| Epochs | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 50 | 20 | 20 | 20 |
| Validation set Accuracy | 0.624567 | 0.695065 | 0.640693 | 0.680519 | 0.621299 | 0.603983 | 0.494372 | 0.473939 | 0.566234 | 0.666667 | 0.602251 | 0.399654 |
| Test set Accuracy | 0.662359 | 0.68948 | 0.629121 | 0.672754 | 0.617081 | 0.598901 | 0.502909 | 0.475436 | 0.567469 | 0.681965 | 0.608436 | 0.404008 |
| Validation set F1 | 0.391548 | 0.411632 | 0.388973 | 0.348477 | 0.403851 | 0.392735 | 0.356892 | 0.345063 | 0.407256 | 0.344405 | 0.384198 | 0.260841 |
| Test set F1 | 0.45446 | 0.398209 | 0.372987 | 0.34481 | 0.406119 | 0.390721 | 0.366987 | 0.346666 | 0.410564 | 0.350917 | 0.390147 | 0.269034 |
| Training Time | 120.844 | 44.9329 | 121.472 | 120.403 | 199.156 | 21.4401 | 7.38834 | 8.53818 | 1684.7 | 74.6124 | 113.332 | 1375.32 |
The pretrained model actually dis-improved performance quite a bit, and it was very slow.
There was no definitive winner persay, but I think model 9 presents the best rounded model. It has a fair accuracy score (above 50) and F1 scores in the higher end of what we have seen. It also, incidentally, had the longest training time. I have decided to use this to compare against my reinforcement learning model.
best_weights_filepath = './best_weights_model_9.hdf5'
model.load_weights(best_weights_filepath)
model.save("cnn_choice.mod")
INFO:tensorflow:Assets written to: cnn_choice.mod/assets
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import gym
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.optimizers import Adam
# We use keras-rl2 a reinforcement learning package that builds on top of keras and openAI gym (pip install keras-rl2)
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
import LunarEirLander
from rl.callbacks import FileLogger, ModelIntervalCheckpoint
import time
env = LunarEirLander.LunarEirLander()
nb_actions = env.action_space.n
rl = pd.DataFrame(index=['Window Length', 'Architecture', 'Training Steps', 'Memory Limit', 'Log Interval', 'Average Reward'])
# Structures to store our trained models and their weights
weights = []
models = []
In establishing a baseline, I decided to use a relatively simple model architecture, and let it run for a while. The memory limit is 50,000, while our step limit is 5,000,000. This is a big discrepency, but I thought it would be an interesting place to start, and at the end of this long training time, it should be clear whether the memory limit is sufficient or not.
rl['Model 1'] = [1, '128/64/32', 5000000, 50000, 500, None]
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())
Model: "sequential_37" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_36 (Flatten) (None, 8) 0 _________________________________________________________________ dense_144 (Dense) (None, 128) 1152 _________________________________________________________________ activation_144 (Activation) (None, 128) 0 _________________________________________________________________ dense_145 (Dense) (None, 64) 8256 _________________________________________________________________ activation_145 (Activation) (None, 64) 0 _________________________________________________________________ dense_146 (Dense) (None, 32) 2080 _________________________________________________________________ activation_146 (Activation) (None, 32) 0 _________________________________________________________________ dense_147 (Dense) (None, 4) 132 _________________________________________________________________ activation_147 (Activation) (None, 4) 0 ================================================================= Total params: 11,620 Trainable params: 11,620 Non-trainable params: 0 _________________________________________________________________ None
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=30,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
# weights_filename = f'dqn_lunar_weights.h5f'
checkpoint_weights_filename = 'dqn_lunar_weights_{step}.h5f'
log_filename = f'dqn_lunar_log.json'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]
start_time = time.time()
history = dqn.fit(env, callbacks=callbacks, nb_steps=5000000, log_interval=500)
end_time = time.time()
Training for 5000000 steps ... Interval 1 (0 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2968 2 episodes - episode_reward: -64.423 [-106.174, -22.672] - loss: 11.332 - mae: 45.887 - mean_q: 58.191 Interval 2 (500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1022 2 episodes - episode_reward: -3.217 [-199.287, 192.853] - loss: 11.998 - mae: 46.349 - mean_q: 58.936 Interval 3 (1000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1628 1 episodes - episode_reward: -168.263 [-168.263, -168.263] - loss: 12.304 - mae: 46.852 - mean_q: 59.658 Interval 4 (1500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5485 3 episodes - episode_reward: 99.399 [-106.636, 293.945] - loss: 13.118 - mae: 47.224 - mean_q: 59.815 Interval 5 (2000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7257 3 episodes - episode_reward: -148.137 [-193.370, -88.525] - loss: 16.457 - mae: 48.257 - mean_q: 61.103 Interval 6 (2500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2837 2 episodes - episode_reward: 52.696 [-100.000, 205.391] - loss: 13.022 - mae: 48.027 - mean_q: 61.484 Interval 7 (3000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4640 2 episodes - episode_reward: -92.956 [-206.409, 20.498] - loss: 11.433 - mae: 48.568 - mean_q: 62.093 Interval 8 (3500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0498 2 episodes - episode_reward: -21.080 [-45.077, 2.916] - loss: 13.082 - mae: 49.012 - mean_q: 63.046 Interval 9 (4000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4220 1 episodes - episode_reward: 211.878 [211.878, 211.878] - loss: 10.383 - mae: 49.893 - mean_q: 63.800 Interval 10 (4500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0200 2 episodes - episode_reward: 73.404 [-108.346, 255.153] - loss: 12.138 - mae: 50.321 - mean_q: 64.416 Interval 11 (5000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1708 2 episodes - episode_reward: 35.138 [-96.872, 167.148] - loss: 13.105 - mae: 50.468 - mean_q: 64.957 Interval 12 (5500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6801 1 episodes - episode_reward: 293.951 [293.951, 293.951] - loss: 13.186 - mae: 50.534 - mean_q: 64.715 Interval 13 (6000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0205 1 episodes - episode_reward: -48.235 [-48.235, -48.235] - loss: 12.578 - mae: 51.440 - mean_q: 66.475 Interval 14 (6500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2263 1 episodes - episode_reward: 172.171 [172.171, 172.171] - loss: 14.637 - mae: 51.170 - mean_q: 65.845 Interval 15 (7000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4947 1 episodes - episode_reward: 224.384 [224.384, 224.384] - loss: 12.155 - mae: 51.131 - mean_q: 66.041 Interval 16 (7500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5953 1 episodes - episode_reward: 176.249 [176.249, 176.249] - loss: 13.455 - mae: 51.570 - mean_q: 66.076 Interval 17 (8000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2367 2 episodes - episode_reward: -336.200 [-550.630, -121.769] - loss: 13.814 - mae: 51.849 - mean_q: 67.069 Interval 18 (8500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8438 2 episodes - episode_reward: 264.907 [256.370, 273.443] - loss: 19.953 - mae: 52.101 - mean_q: 66.817 Interval 19 (9000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2767 1 episodes - episode_reward: 248.164 [248.164, 248.164] - loss: 12.619 - mae: 51.897 - mean_q: 66.987 Interval 20 (9500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2128 1 episodes - episode_reward: -130.925 [-130.925, -130.925] - loss: 15.200 - mae: 52.703 - mean_q: 67.867 Interval 21 (10000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4209 1 episodes - episode_reward: 282.660 [282.660, 282.660] - loss: 15.041 - mae: 52.909 - mean_q: 67.926 Interval 22 (10500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5309 1 episodes - episode_reward: 208.861 [208.861, 208.861] - loss: 14.633 - mae: 52.818 - mean_q: 67.502 Interval 23 (11000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1992 5 episodes - episode_reward: 4.806 [-199.768, 267.824] - loss: 15.000 - mae: 52.544 - mean_q: 66.834 Interval 24 (11500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2144 Interval 25 (12000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2703 3 episodes - episode_reward: -88.463 [-159.549, -5.841] - loss: 16.620 - mae: 52.621 - mean_q: 67.147 Interval 26 (12500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2370 1 episodes - episode_reward: 159.240 [159.240, 159.240] - loss: 14.464 - mae: 52.913 - mean_q: 67.486 Interval 27 (13000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0989 1 episodes - episode_reward: -328.578 [-328.578, -328.578] - loss: 13.894 - mae: 53.133 - mean_q: 67.155 Interval 28 (13500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1693 Interval 29 (14000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0889 Interval 30 (14500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4338 1 episodes - episode_reward: 29.364 [29.364, 29.364] - loss: 13.120 - mae: 53.151 - mean_q: 66.696 Interval 31 (15000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2695 1 episodes - episode_reward: 227.546 [227.546, 227.546] - loss: 15.218 - mae: 53.789 - mean_q: 67.830 Interval 32 (15500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0058 Interval 33 (16000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1937 Interval 34 (16500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2854 1 episodes - episode_reward: 25.162 [25.162, 25.162] - loss: 14.723 - mae: 54.400 - mean_q: 68.518 Interval 35 (17000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0776 Interval 36 (17500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1860 1 episodes - episode_reward: 205.978 [205.978, 205.978] - loss: 17.133 - mae: 54.040 - mean_q: 68.118 Interval 37 (18000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7898 1 episodes - episode_reward: -362.537 [-362.537, -362.537] - loss: 13.846 - mae: 54.610 - mean_q: 68.729 Interval 38 (18500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3164 1 episodes - episode_reward: 134.453 [134.453, 134.453] - loss: 20.636 - mae: 54.292 - mean_q: 68.305 Interval 39 (19000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4737 1 episodes - episode_reward: 196.644 [196.644, 196.644] - loss: 16.947 - mae: 54.708 - mean_q: 69.374 Interval 40 (19500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1233 1 episodes - episode_reward: -56.701 [-56.701, -56.701] - loss: 16.588 - mae: 55.589 - mean_q: 69.867 Interval 41 (20000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2589 Interval 42 (20500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1060 Interval 43 (21000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1750 Interval 44 (21500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2075 Interval 45 (22000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2146 Interval 46 (22500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1720 Interval 47 (23000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1999 Interval 48 (23500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1903 Interval 49 (24000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0257 1 episodes - episode_reward: -850.114 [-850.114, -850.114] - loss: 16.909 - mae: 52.839 - mean_q: 66.855 Interval 50 (24500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0743 2 episodes - episode_reward: 23.733 [-159.537, 207.003] - loss: 15.484 - mae: 53.073 - mean_q: 67.108 Interval 51 (25000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3081 2 episodes - episode_reward: -34.004 [-260.440, 192.432] - loss: 19.264 - mae: 52.717 - mean_q: 67.172 Interval 52 (25500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0213 Interval 53 (26000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1599 Interval 54 (26500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2088 Interval 55 (27000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1510 Interval 56 (27500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2212 Interval 57 (28000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1478 Interval 58 (28500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1646 Interval 59 (29000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1918 Interval 60 (29500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1974 Interval 61 (30000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1631 Interval 62 (30500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.2288 Interval 63 (31000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1938 Interval 64 (31500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1515 Interval 65 (32000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2069 Interval 66 (32500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1691 Interval 67 (33000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2235 Interval 68 (33500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.4184 Interval 69 (34000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9986 3 episodes - episode_reward: -670.357 [-1705.001, -78.091] - loss: 12.581 - mae: 46.715 - mean_q: 59.339 Interval 70 (34500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3647 3 episodes - episode_reward: -215.116 [-328.531, -129.688] - loss: 11.214 - mae: 46.854 - mean_q: 59.220 Interval 71 (35000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8563 4 episodes - episode_reward: -237.136 [-324.035, -125.454] - loss: 15.123 - mae: 46.182 - mean_q: 58.222 Interval 72 (35500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4301 5 episodes - episode_reward: -159.345 [-209.497, -100.000] - loss: 17.991 - mae: 45.957 - mean_q: 57.120 Interval 73 (36000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3521 1 episodes - episode_reward: -258.870 [-258.870, -258.870] - loss: 15.196 - mae: 44.592 - mean_q: 55.005 Interval 74 (36500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4042 5 episodes - episode_reward: -135.465 [-185.873, -95.274] - loss: 12.775 - mae: 44.978 - mean_q: 55.740 Interval 75 (37000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2327 2 episodes - episode_reward: -69.337 [-100.000, -38.675] - loss: 16.108 - mae: 44.568 - mean_q: 55.032 Interval 76 (37500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0237 4 episodes - episode_reward: -135.521 [-180.649, -89.123] - loss: 19.046 - mae: 44.109 - mean_q: 54.136 Interval 77 (38000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9337 2 episodes - episode_reward: -668.401 [-1187.809, -148.993] - loss: 14.510 - mae: 44.010 - mean_q: 52.513 Interval 78 (38500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5004 1 episodes - episode_reward: -284.148 [-284.148, -284.148] - loss: 16.639 - mae: 43.295 - mean_q: 51.772 Interval 79 (39000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2597 1 episodes - episode_reward: -91.022 [-91.022, -91.022] - loss: 9.611 - mae: 42.709 - mean_q: 50.747 Interval 80 (39500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2926 2 episodes - episode_reward: -111.337 [-176.732, -45.943] - loss: 19.268 - mae: 42.068 - mean_q: 49.644 Interval 81 (40000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4137 1 episodes - episode_reward: -1036.012 [-1036.012, -1036.012] - loss: 16.685 - mae: 41.917 - mean_q: 48.816 Interval 82 (40500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3153 5 episodes - episode_reward: -153.084 [-220.015, -58.090] - loss: 18.472 - mae: 42.353 - mean_q: 48.737 Interval 83 (41000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1956 3 episodes - episode_reward: -191.747 [-276.362, -142.425] - loss: 17.583 - mae: 42.030 - mean_q: 47.654 Interval 84 (41500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0509 1 episodes - episode_reward: -180.425 [-180.425, -180.425] - loss: 14.735 - mae: 41.642 - mean_q: 48.047 Interval 85 (42000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1873 2 episodes - episode_reward: 97.354 [-111.500, 306.209] - loss: 13.983 - mae: 40.903 - mean_q: 46.189 Interval 86 (42500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6554 2 episodes - episode_reward: -117.489 [-135.751, -99.228] - loss: 17.129 - mae: 40.718 - mean_q: 46.120 Interval 87 (43000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4833 3 episodes - episode_reward: -274.734 [-435.850, -177.408] - loss: 20.544 - mae: 40.303 - mean_q: 45.913 Interval 88 (43500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1626 3 episodes - episode_reward: -364.613 [-421.354, -301.937] - loss: 14.760 - mae: 40.224 - mean_q: 45.353 Interval 89 (44000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1390 4 episodes - episode_reward: -267.891 [-410.954, -64.847] - loss: 17.838 - mae: 40.428 - mean_q: 45.548 Interval 90 (44500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2604 3 episodes - episode_reward: -210.311 [-355.837, -116.043] - loss: 13.518 - mae: 39.550 - mean_q: 44.131 Interval 91 (45000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5851 1 episodes - episode_reward: -248.718 [-248.718, -248.718] - loss: 15.532 - mae: 39.558 - mean_q: 43.633 Interval 92 (45500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0930 3 episodes - episode_reward: -197.625 [-265.594, -140.535] - loss: 18.980 - mae: 39.564 - mean_q: 43.179 Interval 93 (46000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9594 4 episodes - episode_reward: -227.831 [-307.418, -142.439] - loss: 18.571 - mae: 39.023 - mean_q: 42.151 Interval 94 (46500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5543 2 episodes - episode_reward: -439.596 [-493.779, -385.413] - loss: 22.225 - mae: 38.249 - mean_q: 41.044 Interval 95 (47000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1045 1 episodes - episode_reward: -454.927 [-454.927, -454.927] - loss: 15.778 - mae: 37.959 - mean_q: 39.572 Interval 96 (47500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6416 1 episodes - episode_reward: -300.482 [-300.482, -300.482] - loss: 19.015 - mae: 37.805 - mean_q: 38.990 Interval 97 (48000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6900 2 episodes - episode_reward: -462.497 [-714.026, -210.968] - loss: 17.946 - mae: 37.169 - mean_q: 38.192 Interval 98 (48500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2686 2 episodes - episode_reward: -334.921 [-491.659, -178.183] - loss: 16.062 - mae: 36.419 - mean_q: 36.917 Interval 99 (49000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1572 Interval 100 (49500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5310 2 episodes - episode_reward: -164.059 [-176.401, -151.717] - loss: 14.197 - mae: 36.593 - mean_q: 36.687 Interval 101 (50000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3494 1 episodes - episode_reward: -127.396 [-127.396, -127.396] - loss: 20.645 - mae: 36.003 - mean_q: 35.772 Interval 102 (50500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2199 1 episodes - episode_reward: -183.046 [-183.046, -183.046] - loss: 22.123 - mae: 35.756 - mean_q: 35.253 Interval 103 (51000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2123 Interval 104 (51500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6497 2 episodes - episode_reward: -436.496 [-685.380, -187.611] - loss: 22.337 - mae: 35.567 - mean_q: 35.531 Interval 105 (52000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2364 Interval 106 (52500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2561 Interval 107 (53000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9214 3 episodes - episode_reward: -156.532 [-464.167, 96.056] - loss: 17.343 - mae: 35.392 - mean_q: 35.408 Interval 108 (53500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4854 1 episodes - episode_reward: -698.798 [-698.798, -698.798] - loss: 15.919 - mae: 36.297 - mean_q: 36.559 Interval 109 (54000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7123 4 episodes - episode_reward: -319.537 [-509.033, -100.000] - loss: 15.691 - mae: 37.483 - mean_q: 37.146 Interval 110 (54500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1447 3 episodes - episode_reward: -231.422 [-394.399, -115.800] - loss: 21.178 - mae: 38.736 - mean_q: 38.564 Interval 111 (55000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6895 1 episodes - episode_reward: -807.950 [-807.950, -807.950] - loss: 18.594 - mae: 39.934 - mean_q: 40.670 Interval 112 (55500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6263 Interval 113 (56000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2041 2 episodes - episode_reward: -404.610 [-632.365, -176.855] - loss: 18.096 - mae: 43.811 - mean_q: 43.790 Interval 114 (56500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3210 1 episodes - episode_reward: -1021.276 [-1021.276, -1021.276] - loss: 16.882 - mae: 46.088 - mean_q: 46.424 Interval 115 (57000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2009 1 episodes - episode_reward: -763.049 [-763.049, -763.049] - loss: 18.231 - mae: 49.466 - mean_q: 49.048 Interval 116 (57500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8239 3 episodes - episode_reward: -288.076 [-572.025, -77.390] - loss: 16.193 - mae: 53.417 - mean_q: 53.424 Interval 117 (58000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9368 5 episodes - episode_reward: -215.489 [-465.741, -93.011] - loss: 22.681 - mae: 57.719 - mean_q: 59.165 Interval 118 (58500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9336 2 episodes - episode_reward: -444.522 [-611.415, -277.630] - loss: 21.429 - mae: 63.161 - mean_q: 65.707 Interval 119 (59000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1478 1 episodes - episode_reward: -206.915 [-206.915, -206.915] - loss: 26.741 - mae: 70.544 - mean_q: 74.359 Interval 120 (59500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5252 5 episodes - episode_reward: -256.952 [-391.806, -109.675] - loss: 32.456 - mae: 77.138 - mean_q: 82.394 Interval 121 (60000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2532 Interval 122 (60500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3461 6 episodes - episode_reward: -201.860 [-329.732, -100.000] - loss: 40.001 - mae: 90.600 - mean_q: 101.006 Interval 123 (61000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3837 4 episodes - episode_reward: -184.507 [-281.341, -107.423] - loss: 27.522 - mae: 95.977 - mean_q: 110.367 Interval 124 (61500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1723 3 episodes - episode_reward: -363.330 [-406.054, -330.573] - loss: 35.533 - mae: 102.482 - mean_q: 116.874 Interval 125 (62000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9495 3 episodes - episode_reward: -170.599 [-287.424, -23.717] - loss: 40.787 - mae: 107.401 - mean_q: 123.162 Interval 126 (62500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8388 1 episodes - episode_reward: -323.435 [-323.435, -323.435] - loss: 29.310 - mae: 109.415 - mean_q: 125.596 Interval 127 (63000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1911 3 episodes - episode_reward: -224.309 [-304.000, -107.186] - loss: 37.634 - mae: 112.041 - mean_q: 129.887 Interval 128 (63500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7418 2 episodes - episode_reward: -421.653 [-648.462, -194.843] - loss: 32.574 - mae: 112.443 - mean_q: 131.408 Interval 129 (64000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9309 2 episodes - episode_reward: -189.176 [-264.904, -113.448] - loss: 33.173 - mae: 113.036 - mean_q: 132.498 Interval 130 (64500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0514 3 episodes - episode_reward: -211.630 [-262.729, -110.945] - loss: 33.705 - mae: 113.910 - mean_q: 134.057 Interval 131 (65000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7180 2 episodes - episode_reward: -174.172 [-242.103, -106.242] - loss: 37.393 - mae: 112.825 - mean_q: 132.585 Interval 132 (65500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7890 2 episodes - episode_reward: -173.593 [-244.826, -102.359] - loss: 55.879 - mae: 111.062 - mean_q: 132.277 Interval 133 (66000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2918 2 episodes - episode_reward: -312.778 [-376.687, -248.870] - loss: 32.883 - mae: 110.209 - mean_q: 131.975 Interval 134 (66500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1944 Interval 135 (67000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2702 1 episodes - episode_reward: -328.331 [-328.331, -328.331] - loss: 49.332 - mae: 109.357 - mean_q: 132.037 Interval 136 (67500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9001 2 episodes - episode_reward: -215.463 [-272.976, -157.949] - loss: 27.635 - mae: 108.890 - mean_q: 134.124 Interval 137 (68000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7609 1 episodes - episode_reward: -284.610 [-284.610, -284.610] - loss: 40.232 - mae: 107.437 - mean_q: 131.593 Interval 138 (68500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2861 2 episodes - episode_reward: -141.371 [-148.840, -133.902] - loss: 30.820 - mae: 107.506 - mean_q: 131.947 Interval 139 (69000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4748 1 episodes - episode_reward: -199.383 [-199.383, -199.383] - loss: 34.697 - mae: 106.772 - mean_q: 131.762 Interval 140 (69500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5817 1 episodes - episode_reward: -270.048 [-270.048, -270.048] - loss: 31.798 - mae: 106.717 - mean_q: 131.715 Interval 141 (70000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1662 1 episodes - episode_reward: -590.691 [-590.691, -590.691] - loss: 39.206 - mae: 105.804 - mean_q: 130.177 Interval 142 (70500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3458 Interval 143 (71000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9982 3 episodes - episode_reward: -227.748 [-285.771, -197.055] - loss: 34.190 - mae: 106.280 - mean_q: 131.226 Interval 144 (71500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6299 2 episodes - episode_reward: -121.099 [-142.197, -100.000] - loss: 28.925 - mae: 106.712 - mean_q: 132.575 Interval 145 (72000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2494 2 episodes - episode_reward: -198.050 [-224.685, -171.415] - loss: 34.276 - mae: 106.271 - mean_q: 130.798 Interval 146 (72500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2588 1 episodes - episode_reward: -411.014 [-411.014, -411.014] - loss: 34.849 - mae: 105.635 - mean_q: 130.147 Interval 147 (73000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3506 Interval 148 (73500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5862 1 episodes - episode_reward: -437.316 [-437.316, -437.316] - loss: 40.117 - mae: 104.930 - mean_q: 129.126 Interval 149 (74000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2915 1 episodes - episode_reward: -147.621 [-147.621, -147.621] - loss: 35.958 - mae: 104.466 - mean_q: 128.632 Interval 150 (74500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1432 1 episodes - episode_reward: -708.307 [-708.307, -708.307] - loss: 37.383 - mae: 104.607 - mean_q: 129.319 Interval 151 (75000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8462 2 episodes - episode_reward: -165.032 [-173.778, -156.286] - loss: 43.471 - mae: 103.644 - mean_q: 127.487 Interval 152 (75500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4991 4 episodes - episode_reward: -184.899 [-348.232, -100.000] - loss: 37.148 - mae: 101.694 - mean_q: 124.575 Interval 153 (76000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6737 1 episodes - episode_reward: -367.479 [-367.479, -367.479] - loss: 41.143 - mae: 100.866 - mean_q: 123.466 Interval 154 (76500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0827 Interval 155 (77000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2739 Interval 156 (77500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4446 1 episodes - episode_reward: -290.913 [-290.913, -290.913] - loss: 45.248 - mae: 96.327 - mean_q: 116.345 Interval 157 (78000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5724 1 episodes - episode_reward: -264.533 [-264.533, -264.533] - loss: 31.556 - mae: 96.784 - mean_q: 117.763 Interval 158 (78500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3846 2 episodes - episode_reward: -152.511 [-170.043, -134.980] - loss: 36.275 - mae: 95.517 - mean_q: 114.970 Interval 159 (79000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4724 1 episodes - episode_reward: -127.076 [-127.076, -127.076] - loss: 34.971 - mae: 94.827 - mean_q: 114.024 Interval 160 (79500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1906 1 episodes - episode_reward: -197.779 [-197.779, -197.779] - loss: 35.402 - mae: 93.630 - mean_q: 112.416 Interval 161 (80000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4896 1 episodes - episode_reward: -142.530 [-142.530, -142.530] - loss: 30.826 - mae: 92.915 - mean_q: 110.904 Interval 162 (80500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1129 Interval 163 (81000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2357 Interval 164 (81500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2809 Interval 165 (82000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2745 Interval 166 (82500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0980 Interval 167 (83000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0450 Interval 168 (83500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.4556 Interval 169 (84000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5332 1 episodes - episode_reward: -1026.603 [-1026.603, -1026.603] - loss: 44.743 - mae: 88.600 - mean_q: 104.619 Interval 170 (84500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2381 Interval 171 (85000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3311 1 episodes - episode_reward: -365.674 [-365.674, -365.674] - loss: 43.915 - mae: 88.502 - mean_q: 105.550 Interval 172 (85500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4196 Interval 173 (86000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0363 Interval 174 (86500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4881 1 episodes - episode_reward: -369.330 [-369.330, -369.330] - loss: 56.352 - mae: 88.190 - mean_q: 103.751 Interval 175 (87000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5422 5 episodes - episode_reward: -371.645 [-576.820, -195.355] - loss: 169.028 - mae: 90.930 - mean_q: 107.981 Interval 176 (87500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.2459 4 episodes - episode_reward: -513.800 [-738.657, -394.475] - loss: 133.715 - mae: 93.982 - mean_q: 110.953 Interval 177 (88000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.9104 9 episodes - episode_reward: -337.096 [-619.517, -100.000] - loss: 152.570 - mae: 97.684 - mean_q: 115.888 Interval 178 (88500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4805 8 episodes - episode_reward: -304.616 [-488.381, -93.032] - loss: 93.725 - mae: 99.568 - mean_q: 116.246 Interval 179 (89000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.8506 8 episodes - episode_reward: -482.367 [-610.223, -222.639] - loss: 72.437 - mae: 101.141 - mean_q: 116.796 Interval 180 (89500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4471 6 episodes - episode_reward: -406.428 [-597.814, -156.875] - loss: 132.645 - mae: 105.462 - mean_q: 120.120 Interval 181 (90000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.6877 6 episodes - episode_reward: -302.850 [-442.282, -128.239] - loss: 162.167 - mae: 108.541 - mean_q: 121.399 Interval 182 (90500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3303 6 episodes - episode_reward: -380.824 [-607.668, -99.753] - loss: 142.426 - mae: 111.367 - mean_q: 125.574 Interval 183 (91000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8343 4 episodes - episode_reward: -371.688 [-606.490, -234.099] - loss: 72.729 - mae: 113.025 - mean_q: 126.466 Interval 184 (91500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6470 6 episodes - episode_reward: -129.480 [-207.786, -73.214] - loss: 55.320 - mae: 114.793 - mean_q: 124.952 Interval 185 (92000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0193 5 episodes - episode_reward: -312.133 [-482.306, -102.541] - loss: 88.145 - mae: 118.032 - mean_q: 128.848 Interval 186 (92500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3639 4 episodes - episode_reward: -173.033 [-302.312, -114.244] - loss: 77.981 - mae: 119.412 - mean_q: 130.030 Interval 187 (93000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3605 3 episodes - episode_reward: -154.819 [-192.160, -126.684] - loss: 67.374 - mae: 121.335 - mean_q: 132.682 Interval 188 (93500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5287 3 episodes - episode_reward: -289.927 [-403.987, -173.741] - loss: 84.755 - mae: 122.947 - mean_q: 136.616 Interval 189 (94000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0176 3 episodes - episode_reward: -217.683 [-318.679, -101.417] - loss: 105.791 - mae: 124.623 - mean_q: 138.440 Interval 190 (94500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3936 2 episodes - episode_reward: -322.007 [-463.553, -180.461] - loss: 82.607 - mae: 125.269 - mean_q: 138.972 Interval 191 (95000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5881 3 episodes - episode_reward: -251.567 [-288.728, -189.151] - loss: 111.733 - mae: 127.829 - mean_q: 141.464 Interval 192 (95500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4695 4 episodes - episode_reward: -297.685 [-545.390, -102.156] - loss: 97.240 - mae: 129.551 - mean_q: 144.340 Interval 193 (96000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0848 Interval 194 (96500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5367 1 episodes - episode_reward: -401.092 [-401.092, -401.092] - loss: 108.512 - mae: 135.165 - mean_q: 149.554 Interval 195 (97000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7915 1 episodes - episode_reward: -260.411 [-260.411, -260.411] - loss: 108.669 - mae: 137.622 - mean_q: 152.820 Interval 196 (97500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3517 1 episodes - episode_reward: -281.306 [-281.306, -281.306] - loss: 114.279 - mae: 140.524 - mean_q: 156.117 Interval 197 (98000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4752 5 episodes - episode_reward: -141.004 [-284.588, -88.459] - loss: 135.281 - mae: 141.873 - mean_q: 159.485 Interval 198 (98500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4096 5 episodes - episode_reward: -247.092 [-323.514, -174.206] - loss: 161.003 - mae: 144.450 - mean_q: 161.077 Interval 199 (99000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6607 2 episodes - episode_reward: -304.022 [-393.213, -214.830] - loss: 150.088 - mae: 147.889 - mean_q: 165.100 Interval 200 (99500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9261 2 episodes - episode_reward: -459.087 [-472.229, -445.944] - loss: 160.592 - mae: 147.478 - mean_q: 164.581 Interval 201 (100000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1931 4 episodes - episode_reward: -323.749 [-484.238, -79.387] - loss: 127.285 - mae: 149.970 - mean_q: 167.005 Interval 202 (100500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1855 3 episodes - episode_reward: -309.703 [-385.512, -208.604] - loss: 141.007 - mae: 151.232 - mean_q: 170.683 Interval 203 (101000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6526 2 episodes - episode_reward: -440.724 [-510.281, -371.167] - loss: 206.774 - mae: 151.575 - mean_q: 171.346 Interval 204 (101500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1205 5 episodes - episode_reward: -252.100 [-413.736, -100.000] - loss: 142.267 - mae: 151.508 - mean_q: 170.240 Interval 205 (102000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9218 3 episodes - episode_reward: -406.079 [-736.856, -119.212] - loss: 162.966 - mae: 154.275 - mean_q: 174.738 Interval 206 (102500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8842 5 episodes - episode_reward: -301.719 [-419.213, -178.025] - loss: 182.420 - mae: 153.948 - mean_q: 174.010 Interval 207 (103000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4965 3 episodes - episode_reward: -431.286 [-448.775, -405.647] - loss: 112.719 - mae: 150.314 - mean_q: 172.134 Interval 208 (103500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3069 4 episodes - episode_reward: -296.819 [-502.917, -104.437] - loss: 171.087 - mae: 149.135 - mean_q: 171.064 Interval 209 (104000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2545 5 episodes - episode_reward: -219.874 [-390.798, -100.000] - loss: 141.662 - mae: 148.497 - mean_q: 173.088 Interval 210 (104500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4060 3 episodes - episode_reward: -352.449 [-381.000, -308.137] - loss: 115.093 - mae: 147.777 - mean_q: 172.608 Interval 211 (105000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4446 4 episodes - episode_reward: -366.302 [-493.009, -100.000] - loss: 83.518 - mae: 146.695 - mean_q: 171.975 Interval 212 (105500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2802 1 episodes - episode_reward: -471.627 [-471.627, -471.627] - loss: 144.195 - mae: 149.399 - mean_q: 176.813 Interval 213 (106000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2900 2 episodes - episode_reward: -327.116 [-412.860, -241.371] - loss: 184.369 - mae: 150.630 - mean_q: 177.929 Interval 214 (106500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9365 2 episodes - episode_reward: -323.056 [-329.683, -316.430] - loss: 204.548 - mae: 152.339 - mean_q: 179.315 Interval 215 (107000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4472 2 episodes - episode_reward: -260.166 [-349.261, -171.072] - loss: 133.863 - mae: 150.929 - mean_q: 177.538 Interval 216 (107500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1656 5 episodes - episode_reward: -318.015 [-500.876, -120.085] - loss: 178.204 - mae: 150.884 - mean_q: 177.931 Interval 217 (108000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2334 1 episodes - episode_reward: -631.935 [-631.935, -631.935] - loss: 203.389 - mae: 153.499 - mean_q: 178.785 Interval 218 (108500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9967 2 episodes - episode_reward: -309.208 [-326.093, -292.322] - loss: 139.735 - mae: 155.866 - mean_q: 182.808 Interval 219 (109000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9324 2 episodes - episode_reward: -153.861 [-275.548, -32.174] - loss: 163.772 - mae: 157.413 - mean_q: 185.670 Interval 220 (109500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7726 2 episodes - episode_reward: -303.513 [-471.106, -135.921] - loss: 160.507 - mae: 159.198 - mean_q: 186.973 Interval 221 (110000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8223 1 episodes - episode_reward: -297.666 [-297.666, -297.666] - loss: 183.763 - mae: 161.178 - mean_q: 190.225 Interval 222 (110500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8114 2 episodes - episode_reward: -278.150 [-319.707, -236.592] - loss: 138.043 - mae: 161.421 - mean_q: 189.183 Interval 223 (111000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2672 2 episodes - episode_reward: -150.118 [-198.002, -102.233] - loss: 111.476 - mae: 162.921 - mean_q: 192.087 Interval 224 (111500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4500 1 episodes - episode_reward: -528.847 [-528.847, -528.847] - loss: 139.757 - mae: 165.113 - mean_q: 195.178 Interval 225 (112000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7510 Interval 226 (112500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0055 2 episodes - episode_reward: -446.524 [-729.558, -163.489] - loss: 123.895 - mae: 167.045 - mean_q: 197.212 Interval 227 (113000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6351 Interval 228 (113500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5863 2 episodes - episode_reward: -270.444 [-493.672, -47.216] - loss: 128.659 - mae: 166.637 - mean_q: 196.317 Interval 229 (114000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0574 1 episodes - episode_reward: -291.269 [-291.269, -291.269] - loss: 145.328 - mae: 165.182 - mean_q: 195.325 Interval 230 (114500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6826 1 episodes - episode_reward: -369.958 [-369.958, -369.958] - loss: 139.796 - mae: 164.195 - mean_q: 196.418 Interval 231 (115000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7153 2 episodes - episode_reward: -555.210 [-709.235, -401.185] - loss: 153.181 - mae: 163.595 - mean_q: 197.017 Interval 232 (115500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6575 Interval 233 (116000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4410 1 episodes - episode_reward: -473.495 [-473.495, -473.495] - loss: 106.381 - mae: 158.399 - mean_q: 189.358 Interval 234 (116500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1659 Interval 235 (117000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1505 1 episodes - episode_reward: -494.920 [-494.920, -494.920] - loss: 109.737 - mae: 154.748 - mean_q: 185.400 Interval 236 (117500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2762 Interval 237 (118000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8279 1 episodes - episode_reward: -477.269 [-477.269, -477.269] - loss: 111.857 - mae: 151.336 - mean_q: 180.357 Interval 238 (118500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5305 2 episodes - episode_reward: -366.866 [-436.516, -297.216] - loss: 105.815 - mae: 148.396 - mean_q: 176.877 Interval 239 (119000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9768 2 episodes - episode_reward: -360.333 [-404.332, -316.334] - loss: 98.432 - mae: 145.230 - mean_q: 171.287 Interval 240 (119500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3814 2 episodes - episode_reward: -496.413 [-615.723, -377.103] - loss: 107.053 - mae: 143.512 - mean_q: 168.589 Interval 241 (120000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2122 Interval 242 (120500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8863 1 episodes - episode_reward: -648.331 [-648.331, -648.331] - loss: 81.325 - mae: 141.275 - mean_q: 164.660 Interval 243 (121000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7355 3 episodes - episode_reward: -261.439 [-470.694, -121.579] - loss: 90.079 - mae: 139.638 - mean_q: 162.163 Interval 244 (121500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4888 3 episodes - episode_reward: -247.632 [-427.204, -133.417] - loss: 87.291 - mae: 138.149 - mean_q: 160.497 Interval 245 (122000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0150 2 episodes - episode_reward: -268.618 [-273.382, -263.854] - loss: 98.878 - mae: 135.525 - mean_q: 155.654 Interval 246 (122500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1042 5 episodes - episode_reward: -331.861 [-430.402, -255.917] - loss: 82.383 - mae: 135.884 - mean_q: 156.096 Interval 247 (123000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2795 2 episodes - episode_reward: -290.143 [-330.189, -250.097] - loss: 91.594 - mae: 136.304 - mean_q: 154.968 Interval 248 (123500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4809 2 episodes - episode_reward: -331.829 [-448.916, -214.742] - loss: 95.762 - mae: 135.741 - mean_q: 153.297 Interval 249 (124000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3578 3 episodes - episode_reward: -223.240 [-282.889, -156.494] - loss: 94.941 - mae: 132.870 - mean_q: 149.904 Interval 250 (124500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6495 3 episodes - episode_reward: -307.768 [-311.404, -302.452] - loss: 85.000 - mae: 132.268 - mean_q: 148.684 Interval 251 (125000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9738 3 episodes - episode_reward: -198.793 [-214.659, -182.615] - loss: 90.758 - mae: 130.399 - mean_q: 145.658 Interval 252 (125500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0351 1 episodes - episode_reward: 167.059 [167.059, 167.059] - loss: 84.118 - mae: 130.698 - mean_q: 145.159 Interval 253 (126000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9186 2 episodes - episode_reward: -258.088 [-286.539, -229.637] - loss: 88.713 - mae: 130.293 - mean_q: 145.203 Interval 254 (126500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7747 3 episodes - episode_reward: -150.418 [-258.683, 30.050] - loss: 85.060 - mae: 132.506 - mean_q: 148.287 Interval 255 (127000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9497 2 episodes - episode_reward: -171.113 [-292.491, -49.735] - loss: 103.308 - mae: 130.815 - mean_q: 143.012 Interval 256 (127500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6343 Interval 257 (128000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2873 Interval 258 (128500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1909 3 episodes - episode_reward: -294.272 [-392.955, -235.869] - loss: 89.187 - mae: 122.736 - mean_q: 128.599 Interval 259 (129000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0004 1 episodes - episode_reward: -328.188 [-328.188, -328.188] - loss: 85.993 - mae: 121.510 - mean_q: 126.193 Interval 260 (129500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6770 4 episodes - episode_reward: -277.110 [-394.092, -157.843] - loss: 90.058 - mae: 118.647 - mean_q: 122.929 Interval 261 (130000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4095 2 episodes - episode_reward: -272.516 [-415.460, -129.572] - loss: 73.366 - mae: 115.615 - mean_q: 117.163 Interval 262 (130500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0144 4 episodes - episode_reward: -265.187 [-459.032, -100.000] - loss: 88.089 - mae: 112.980 - mean_q: 112.024 Interval 263 (131000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8304 2 episodes - episode_reward: -218.296 [-235.012, -201.581] - loss: 86.201 - mae: 111.248 - mean_q: 108.292 Interval 264 (131500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5498 3 episodes - episode_reward: -253.538 [-303.675, -218.247] - loss: 72.855 - mae: 106.195 - mean_q: 100.381 Interval 265 (132000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0655 2 episodes - episode_reward: -293.978 [-336.867, -251.088] - loss: 82.357 - mae: 104.129 - mean_q: 96.974 Interval 266 (132500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0353 2 episodes - episode_reward: -214.360 [-215.118, -213.601] - loss: 99.333 - mae: 101.073 - mean_q: 92.807 Interval 267 (133000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8259 3 episodes - episode_reward: -304.105 [-488.609, -208.763] - loss: 64.974 - mae: 98.454 - mean_q: 89.040 Interval 268 (133500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9645 2 episodes - episode_reward: -245.626 [-345.603, -145.649] - loss: 97.399 - mae: 97.631 - mean_q: 86.481 Interval 269 (134000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6874 1 episodes - episode_reward: -232.611 [-232.611, -232.611] - loss: 96.272 - mae: 96.563 - mean_q: 86.033 Interval 270 (134500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9589 2 episodes - episode_reward: -259.311 [-347.424, -171.198] - loss: 97.572 - mae: 95.858 - mean_q: 84.934 Interval 271 (135000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5085 1 episodes - episode_reward: -248.963 [-248.963, -248.963] - loss: 72.386 - mae: 95.000 - mean_q: 82.684 Interval 272 (135500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5224 Interval 273 (136000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9197 2 episodes - episode_reward: -364.823 [-489.942, -239.703] - loss: 91.894 - mae: 92.728 - mean_q: 78.360 Interval 274 (136500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2865 Interval 275 (137000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4187 Interval 276 (137500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6111 Interval 277 (138000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9916 1 episodes - episode_reward: -1088.093 [-1088.093, -1088.093] - loss: 82.472 - mae: 90.103 - mean_q: 77.971 Interval 278 (138500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5552 Interval 279 (139000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4572 2 episodes - episode_reward: -521.309 [-775.922, -266.695] - loss: 86.933 - mae: 88.623 - mean_q: 79.743 Interval 280 (139500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5689 Interval 281 (140000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5892 1 episodes - episode_reward: -1108.616 [-1108.616, -1108.616] - loss: 101.278 - mae: 86.750 - mean_q: 80.910 Interval 282 (140500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0212 2 episodes - episode_reward: -287.076 [-300.611, -273.542] - loss: 117.030 - mae: 88.176 - mean_q: 85.221 Interval 283 (141000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7288 Interval 284 (141500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4216 1 episodes - episode_reward: -554.810 [-554.810, -554.810] - loss: 109.367 - mae: 90.613 - mean_q: 96.556 Interval 285 (142000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2913 1 episodes - episode_reward: -227.571 [-227.571, -227.571] - loss: 147.752 - mae: 92.901 - mean_q: 101.916 Interval 286 (142500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1796 1 episodes - episode_reward: -870.169 [-870.169, -870.169] - loss: 118.706 - mae: 96.979 - mean_q: 109.402 Interval 287 (143000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7483 4 episodes - episode_reward: -271.376 [-348.572, -100.000] - loss: 104.614 - mae: 99.034 - mean_q: 113.189 Interval 288 (143500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2817 5 episodes - episode_reward: -327.746 [-628.818, -192.906] - loss: 116.721 - mae: 103.045 - mean_q: 118.912 Interval 289 (144000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0658 7 episodes - episode_reward: -147.369 [-264.480, 21.912] - loss: 151.667 - mae: 103.783 - mean_q: 120.962 Interval 290 (144500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2591 2 episodes - episode_reward: -112.688 [-117.445, -107.931] - loss: 462.681 - mae: 107.541 - mean_q: 125.406 Interval 291 (145000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7842 6 episodes - episode_reward: -180.428 [-536.180, -61.734] - loss: 137.207 - mae: 110.167 - mean_q: 130.129 Interval 292 (145500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5055 4 episodes - episode_reward: -257.752 [-435.230, -69.492] - loss: 210.938 - mae: 114.911 - mean_q: 137.894 Interval 293 (146000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4206 2 episodes - episode_reward: 94.691 [33.133, 156.250] - loss: 371.358 - mae: 120.918 - mean_q: 147.602 Interval 294 (146500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4847 1 episodes - episode_reward: -161.015 [-161.015, -161.015] - loss: 292.547 - mae: 129.795 - mean_q: 160.747 Interval 295 (147000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5122 3 episodes - episode_reward: -432.208 [-660.198, -180.329] - loss: 577.041 - mae: 134.913 - mean_q: 169.158 Interval 296 (147500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7909 2 episodes - episode_reward: -174.093 [-190.055, -158.131] - loss: 402.093 - mae: 138.685 - mean_q: 174.621 Interval 297 (148000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2867 3 episodes - episode_reward: -216.263 [-309.058, -94.230] - loss: 285.291 - mae: 143.291 - mean_q: 181.104 Interval 298 (148500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4464 2 episodes - episode_reward: -106.657 [-124.041, -89.274] - loss: 330.240 - mae: 147.979 - mean_q: 187.253 Interval 299 (149000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4607 4 episodes - episode_reward: -306.296 [-642.505, -128.180] - loss: 590.387 - mae: 157.485 - mean_q: 200.068 Interval 300 (149500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6424 2 episodes - episode_reward: -115.197 [-120.445, -109.950] - loss: 308.778 - mae: 162.949 - mean_q: 206.897 Interval 301 (150000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3705 5 episodes - episode_reward: -423.733 [-637.801, -126.153] - loss: 327.740 - mae: 166.429 - mean_q: 210.576 Interval 302 (150500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0989 4 episodes - episode_reward: -420.843 [-492.807, -286.030] - loss: 282.712 - mae: 172.302 - mean_q: 216.538 Interval 303 (151000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2455 2 episodes - episode_reward: -583.764 [-674.765, -492.762] - loss: 298.105 - mae: 182.462 - mean_q: 230.691 Interval 304 (151500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8687 6 episodes - episode_reward: -403.371 [-692.341, -107.864] - loss: 282.459 - mae: 187.223 - mean_q: 235.549 Interval 305 (152000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0400 2 episodes - episode_reward: -467.820 [-577.511, -358.129] - loss: 261.866 - mae: 194.672 - mean_q: 245.416 Interval 306 (152500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9134 5 episodes - episode_reward: -307.432 [-569.823, -106.014] - loss: 340.109 - mae: 205.363 - mean_q: 259.410 Interval 307 (153000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5860 6 episodes - episode_reward: -387.530 [-719.088, -100.000] - loss: 384.556 - mae: 215.996 - mean_q: 273.224 Interval 308 (153500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5941 3 episodes - episode_reward: -419.894 [-743.563, -167.722] - loss: 345.006 - mae: 219.251 - mean_q: 275.939 Interval 309 (154000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9201 3 episodes - episode_reward: -132.655 [-161.354, -99.498] - loss: 283.651 - mae: 227.796 - mean_q: 285.783 Interval 310 (154500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0567 4 episodes - episode_reward: -359.171 [-435.163, -203.304] - loss: 382.017 - mae: 235.167 - mean_q: 295.064 Interval 311 (155000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1798 3 episodes - episode_reward: -430.970 [-799.267, -228.008] - loss: 292.912 - mae: 232.421 - mean_q: 288.503 Interval 312 (155500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4851 2 episodes - episode_reward: -337.281 [-366.391, -308.171] - loss: 269.054 - mae: 235.400 - mean_q: 293.014 Interval 313 (156000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4240 5 episodes - episode_reward: -246.595 [-536.689, -57.661] - loss: 296.882 - mae: 235.840 - mean_q: 292.802 Interval 314 (156500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3708 2 episodes - episode_reward: -344.674 [-564.434, -124.915] - loss: 284.581 - mae: 236.966 - mean_q: 293.859 Interval 315 (157000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3476 3 episodes - episode_reward: -532.308 [-783.256, -354.229] - loss: 301.683 - mae: 234.977 - mean_q: 289.090 Interval 316 (157500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0719 3 episodes - episode_reward: -865.571 [-1461.264, -567.639] - loss: 247.131 - mae: 239.120 - mean_q: 294.893 Interval 317 (158000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4839 4 episodes - episode_reward: -237.203 [-439.320, -104.193] - loss: 238.510 - mae: 231.117 - mean_q: 285.364 Interval 318 (158500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5229 3 episodes - episode_reward: -443.783 [-750.848, -228.646] - loss: 270.227 - mae: 224.818 - mean_q: 279.816 Interval 319 (159000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5897 4 episodes - episode_reward: -387.226 [-575.155, -145.708] - loss: 274.171 - mae: 220.896 - mean_q: 275.049 Interval 320 (159500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0020 4 episodes - episode_reward: -372.718 [-551.877, -173.817] - loss: 222.850 - mae: 219.569 - mean_q: 272.502 Interval 321 (160000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5374 1 episodes - episode_reward: -1131.010 [-1131.010, -1131.010] - loss: 269.176 - mae: 216.692 - mean_q: 267.746 Interval 322 (160500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4271 4 episodes - episode_reward: -349.897 [-553.167, -188.754] - loss: 236.768 - mae: 220.375 - mean_q: 272.929 Interval 323 (161000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6761 2 episodes - episode_reward: -525.861 [-539.028, -512.694] - loss: 345.707 - mae: 219.528 - mean_q: 271.942 Interval 324 (161500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6640 Interval 325 (162000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1326 3 episodes - episode_reward: -685.505 [-1227.420, -261.328] - loss: 245.999 - mae: 218.391 - mean_q: 272.971 Interval 326 (162500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3301 2 episodes - episode_reward: -310.157 [-413.317, -206.997] - loss: 262.912 - mae: 220.817 - mean_q: 279.269 Interval 327 (163000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0565 4 episodes - episode_reward: -279.083 [-451.052, -111.286] - loss: 315.810 - mae: 223.854 - mean_q: 282.112 Interval 328 (163500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7227 1 episodes - episode_reward: -341.633 [-341.633, -341.633] - loss: 263.073 - mae: 229.409 - mean_q: 289.035 Interval 329 (164000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2149 Interval 330 (164500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7280 1 episodes - episode_reward: -522.414 [-522.414, -522.414] - loss: 351.494 - mae: 235.908 - mean_q: 295.067 Interval 331 (165000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1390 2 episodes - episode_reward: -268.625 [-418.138, -119.111] - loss: 278.474 - mae: 237.658 - mean_q: 298.813 Interval 332 (165500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1692 Interval 333 (166000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6393 1 episodes - episode_reward: -447.622 [-447.622, -447.622] - loss: 250.539 - mae: 250.673 - mean_q: 315.745 Interval 334 (166500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5420 2 episodes - episode_reward: -114.457 [-166.863, -62.051] - loss: 340.387 - mae: 261.253 - mean_q: 328.641 Interval 335 (167000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6263 2 episodes - episode_reward: -143.025 [-171.403, -114.647] - loss: 321.977 - mae: 266.366 - mean_q: 334.750 Interval 336 (167500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9407 2 episodes - episode_reward: -268.246 [-416.674, -119.817] - loss: 335.401 - mae: 268.225 - mean_q: 336.941 Interval 337 (168000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4079 1 episodes - episode_reward: -247.784 [-247.784, -247.784] - loss: 337.432 - mae: 272.986 - mean_q: 344.718 Interval 338 (168500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1134 Interval 339 (169000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0774 Interval 340 (169500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0457 Interval 341 (170000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1958 Interval 342 (170500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0890 Interval 343 (171000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.4501 2 episodes - episode_reward: -228.145 [-301.298, -154.991] - loss: 252.977 - mae: 295.508 - mean_q: 373.929 Interval 344 (171500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0428 Interval 345 (172000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0298 Interval 346 (172500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1358 Interval 347 (173000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2282 1 episodes - episode_reward: 1.328 [1.328, 1.328] - loss: 335.867 - mae: 306.868 - mean_q: 387.120 Interval 348 (173500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3183 Interval 349 (174000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0068 Interval 350 (174500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0994 Interval 351 (175000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1160 Interval 352 (175500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1305 Interval 353 (176000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1219 Interval 354 (176500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1825 Interval 355 (177000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1507 Interval 356 (177500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2419 Interval 357 (178000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0848 Interval 358 (178500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1227 Interval 359 (179000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1485 Interval 360 (179500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0895 Interval 361 (180000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1662 Interval 362 (180500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.4082 2 episodes - episode_reward: -588.205 [-886.500, -289.909] - loss: 251.787 - mae: 321.647 - mean_q: 405.866 Interval 363 (181000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0476 Interval 364 (181500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5128 1 episodes - episode_reward: -134.333 [-134.333, -134.333] - loss: 245.777 - mae: 319.148 - mean_q: 402.297 Interval 365 (182000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2210 Interval 366 (182500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3650 1 episodes - episode_reward: -168.224 [-168.224, -168.224] - loss: 226.439 - mae: 318.786 - mean_q: 402.131 Interval 367 (183000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4909 2 episodes - episode_reward: -157.844 [-186.111, -129.577] - loss: 243.337 - mae: 319.982 - mean_q: 403.510 Interval 368 (183500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1368 Interval 369 (184000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2505 1 episodes - episode_reward: -134.216 [-134.216, -134.216] - loss: 235.846 - mae: 319.336 - mean_q: 402.538 Interval 370 (184500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3956 3 episodes - episode_reward: -201.656 [-300.414, -141.949] - loss: 251.174 - mae: 321.102 - mean_q: 404.080 Interval 371 (185000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1841 Interval 372 (185500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0267 Interval 373 (186000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2101 Interval 374 (186500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2711 2 episodes - episode_reward: -365.724 [-474.627, -256.822] - loss: 244.473 - mae: 321.699 - mean_q: 406.896 Interval 375 (187000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2783 2 episodes - episode_reward: -332.757 [-552.733, -112.780] - loss: 268.084 - mae: 316.486 - mean_q: 399.739 Interval 376 (187500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2997 Interval 377 (188000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9605 2 episodes - episode_reward: -780.935 [-951.247, -610.624] - loss: 296.763 - mae: 312.820 - mean_q: 397.991 Interval 378 (188500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3072 Interval 379 (189000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9068 1 episodes - episode_reward: -1364.537 [-1364.537, -1364.537] - loss: 391.909 - mae: 305.668 - mean_q: 389.012 Interval 380 (189500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2047 Interval 381 (190000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6477 Interval 382 (190500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8278 2 episodes - episode_reward: -1502.066 [-2891.674, -112.459] - loss: 326.300 - mae: 309.061 - mean_q: 395.207 Interval 383 (191000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5402 Interval 384 (191500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5688 1 episodes - episode_reward: -1928.558 [-1928.558, -1928.558] - loss: 332.212 - mae: 306.624 - mean_q: 392.737 Interval 385 (192000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1502 Interval 386 (192500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3317 Interval 387 (193000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2244 Interval 388 (193500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3219 Interval 389 (194000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7176 Interval 390 (194500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0578 1 episodes - episode_reward: -1643.803 [-1643.803, -1643.803] - loss: 330.396 - mae: 353.043 - mean_q: 455.185 Interval 391 (195000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0710 Interval 392 (195500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3176 Interval 393 (196000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4789 3 episodes - episode_reward: -137.464 [-275.229, 47.483] - loss: 554.012 - mae: 380.622 - mean_q: 494.067 Interval 394 (196500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3799 Interval 395 (197000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1069 Interval 396 (197500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7475 Interval 397 (198000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3008 1 episodes - episode_reward: -798.010 [-798.010, -798.010] - loss: 492.269 - mae: 419.581 - mean_q: 546.771 Interval 398 (198500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6017 2 episodes - episode_reward: -360.696 [-604.040, -117.351] - loss: 491.659 - mae: 416.938 - mean_q: 541.928 Interval 399 (199000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2554 Interval 400 (199500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9866 2 episodes - episode_reward: -584.251 [-875.404, -293.099] - loss: 720.246 - mae: 423.507 - mean_q: 550.222 Interval 401 (200000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7849 1 episodes - episode_reward: -395.057 [-395.057, -395.057] - loss: 349.531 - mae: 428.425 - mean_q: 558.254 Interval 402 (200500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2641 2 episodes - episode_reward: -315.521 [-401.629, -229.412] - loss: 366.308 - mae: 432.111 - mean_q: 563.196 Interval 403 (201000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8528 1 episodes - episode_reward: -135.271 [-135.271, -135.271] - loss: 355.470 - mae: 434.489 - mean_q: 565.944 Interval 404 (201500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6820 3 episodes - episode_reward: -375.884 [-481.015, -261.768] - loss: 474.040 - mae: 433.536 - mean_q: 567.503 Interval 405 (202000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6000 Interval 406 (202500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4633 Interval 407 (203000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4085 1 episodes - episode_reward: -2275.038 [-2275.038, -2275.038] - loss: 508.077 - mae: 438.974 - mean_q: 576.928 Interval 408 (203500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3702 1 episodes - episode_reward: -550.178 [-550.178, -550.178] - loss: 390.869 - mae: 439.798 - mean_q: 579.914 Interval 409 (204000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1407 Interval 410 (204500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9411 3 episodes - episode_reward: -871.594 [-1852.803, -187.499] - loss: 347.626 - mae: 434.804 - mean_q: 572.494 Interval 411 (205000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9383 2 episodes - episode_reward: -488.299 [-743.595, -233.004] - loss: 459.818 - mae: 432.384 - mean_q: 568.937 Interval 412 (205500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1401 Interval 413 (206000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0224 Interval 414 (206500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9967 1 episodes - episode_reward: -1891.656 [-1891.656, -1891.656] - loss: 281.033 - mae: 425.217 - mean_q: 558.586 Interval 415 (207000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8979 1 episodes - episode_reward: -390.790 [-390.790, -390.790] - loss: 350.617 - mae: 421.589 - mean_q: 554.947 Interval 416 (207500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6534 2 episodes - episode_reward: -528.842 [-742.781, -314.902] - loss: 324.035 - mae: 414.508 - mean_q: 547.124 Interval 417 (208000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1169 Interval 418 (208500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4174 Interval 419 (209000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4763 Interval 420 (209500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0385 2 episodes - episode_reward: -496.523 [-893.045, -100.000] - loss: 235.672 - mae: 401.957 - mean_q: 533.037 Interval 421 (210000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0743 1 episodes - episode_reward: -574.193 [-574.193, -574.193] - loss: 338.872 - mae: 391.442 - mean_q: 517.557 Interval 422 (210500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5945 1 episodes - episode_reward: -285.218 [-285.218, -285.218] - loss: 223.750 - mae: 383.705 - mean_q: 508.244 Interval 423 (211000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9537 Interval 424 (211500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5491 Interval 425 (212000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9627 1 episodes - episode_reward: -968.532 [-968.532, -968.532] - loss: 195.122 - mae: 358.267 - mean_q: 473.056 Interval 426 (212500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4993 Interval 427 (213000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9951 1 episodes - episode_reward: -1066.165 [-1066.165, -1066.165] - loss: 354.449 - mae: 344.063 - mean_q: 454.256 Interval 428 (213500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0384 Interval 429 (214000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8401 1 episodes - episode_reward: -862.163 [-862.163, -862.163] - loss: 345.763 - mae: 325.303 - mean_q: 429.020 Interval 430 (214500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9701 Interval 431 (215000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5804 2 episodes - episode_reward: -1411.309 [-2687.965, -134.654] - loss: 277.987 - mae: 314.623 - mean_q: 414.167 Interval 432 (215500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0787 Interval 433 (216000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5787 Interval 434 (216500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6255 3 episodes - episode_reward: -960.437 [-2405.844, -184.932] - loss: 96.293 - mae: 297.367 - mean_q: 391.174 Interval 435 (217000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5950 1 episodes - episode_reward: -1023.556 [-1023.556, -1023.556] - loss: 431.208 - mae: 289.860 - mean_q: 380.664 Interval 436 (217500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7576 3 episodes - episode_reward: -810.561 [-1879.858, -221.274] - loss: 141.890 - mae: 284.607 - mean_q: 373.299 Interval 437 (218000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1901 5 episodes - episode_reward: -316.772 [-483.863, -177.709] - loss: 277.009 - mae: 271.471 - mean_q: 355.281 Interval 438 (218500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1391 Interval 439 (219000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3945 4 episodes - episode_reward: -315.144 [-569.998, -146.490] - loss: 148.345 - mae: 253.709 - mean_q: 329.427 Interval 440 (219500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1681 1 episodes - episode_reward: -1088.533 [-1088.533, -1088.533] - loss: 315.057 - mae: 246.431 - mean_q: 318.073 Interval 441 (220000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1643 Interval 442 (220500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0995 Interval 443 (221000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1870 1 episodes - episode_reward: -2442.537 [-2442.537, -2442.537] - loss: 176.612 - mae: 225.570 - mean_q: 290.360 Interval 444 (221500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0052 3 episodes - episode_reward: -1029.830 [-2625.452, -159.314] - loss: 115.392 - mae: 222.086 - mean_q: 285.786 Interval 445 (222000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1834 1 episodes - episode_reward: -370.306 [-370.306, -370.306] - loss: 94.519 - mae: 214.533 - mean_q: 275.270 Interval 446 (222500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4506 2 episodes - episode_reward: -533.022 [-552.775, -513.269] - loss: 188.477 - mae: 208.494 - mean_q: 266.267 Interval 447 (223000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2119 3 episodes - episode_reward: -387.079 [-576.152, -291.311] - loss: 134.928 - mae: 201.247 - mean_q: 256.707 Interval 448 (223500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5379 4 episodes - episode_reward: -271.310 [-438.973, -109.843] - loss: 89.736 - mae: 195.500 - mean_q: 247.784 Interval 449 (224000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4428 4 episodes - episode_reward: -364.003 [-682.327, -174.873] - loss: 84.882 - mae: 189.434 - mean_q: 238.436 Interval 450 (224500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3002 Interval 451 (225000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6657 1 episodes - episode_reward: -433.325 [-433.325, -433.325] - loss: 224.614 - mae: 179.095 - mean_q: 224.543 Interval 452 (225500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5302 Interval 453 (226000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4856 7 episodes - episode_reward: -219.846 [-486.509, -107.396] - loss: 84.231 - mae: 171.947 - mean_q: 214.334 Interval 454 (226500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2088 1 episodes - episode_reward: -128.508 [-128.508, -128.508] - loss: 184.828 - mae: 167.855 - mean_q: 207.298 Interval 455 (227000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5405 1 episodes - episode_reward: -169.400 [-169.400, -169.400] - loss: 88.664 - mae: 164.987 - mean_q: 203.544 Interval 456 (227500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4440 1 episodes - episode_reward: -302.993 [-302.993, -302.993] - loss: 112.795 - mae: 163.185 - mean_q: 199.837 Interval 457 (228000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5179 Interval 458 (228500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1561 Interval 459 (229000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3326 Interval 460 (229500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0655 Interval 461 (230000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2798 3 episodes - episode_reward: -324.299 [-604.326, -163.382] - loss: 88.116 - mae: 153.287 - mean_q: 188.629 Interval 462 (230500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3821 Interval 463 (231000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3301 Interval 464 (231500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0117 Interval 465 (232000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2182 1 episodes - episode_reward: -1533.195 [-1533.195, -1533.195] - loss: 68.422 - mae: 149.177 - mean_q: 182.745 Interval 466 (232500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7231 Interval 467 (233000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9930 3 episodes - episode_reward: -252.660 [-482.614, -126.678] - loss: 93.555 - mae: 145.480 - mean_q: 178.468 Interval 468 (233500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1695 3 episodes - episode_reward: -225.133 [-464.277, -100.000] - loss: 65.952 - mae: 145.460 - mean_q: 178.998 Interval 469 (234000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5070 Interval 470 (234500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2185 4 episodes - episode_reward: -199.783 [-613.035, 2.618] - loss: 66.807 - mae: 138.070 - mean_q: 169.527 Interval 471 (235000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1911 Interval 472 (235500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7037 2 episodes - episode_reward: -961.712 [-1777.873, -145.551] - loss: 103.828 - mae: 132.375 - mean_q: 160.936 Interval 473 (236000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0233 2 episodes - episode_reward: -295.837 [-491.673, -100.000] - loss: 53.799 - mae: 128.804 - mean_q: 157.401 Interval 474 (236500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2785 4 episodes - episode_reward: -168.614 [-322.443, -84.730] - loss: 61.546 - mae: 125.166 - mean_q: 150.960 Interval 475 (237000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2962 6 episodes - episode_reward: -182.796 [-276.145, -100.000] - loss: 60.388 - mae: 123.658 - mean_q: 147.892 Interval 476 (237500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1765 4 episodes - episode_reward: -268.458 [-508.832, -100.000] - loss: 49.149 - mae: 123.499 - mean_q: 146.369 Interval 477 (238000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3572 3 episodes - episode_reward: -184.647 [-281.897, -124.309] - loss: 63.737 - mae: 121.963 - mean_q: 143.822 Interval 478 (238500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4428 5 episodes - episode_reward: -161.939 [-351.767, -64.980] - loss: 53.356 - mae: 120.619 - mean_q: 141.812 Interval 479 (239000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9788 3 episodes - episode_reward: -215.098 [-312.291, -28.211] - loss: 71.216 - mae: 118.251 - mean_q: 137.979 Interval 480 (239500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4464 4 episodes - episode_reward: -161.006 [-218.951, -100.000] - loss: 64.191 - mae: 117.443 - mean_q: 136.751 Interval 481 (240000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3548 4 episodes - episode_reward: -149.391 [-209.863, -100.000] - loss: 64.924 - mae: 115.862 - mean_q: 133.549 Interval 482 (240500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4131 2 episodes - episode_reward: -142.648 [-181.673, -103.622] - loss: 68.677 - mae: 113.157 - mean_q: 129.731 Interval 483 (241000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4881 1 episodes - episode_reward: -184.912 [-184.912, -184.912] - loss: 45.394 - mae: 113.082 - mean_q: 130.300 Interval 484 (241500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5172 2 episodes - episode_reward: -130.956 [-219.746, -42.166] - loss: 47.287 - mae: 112.224 - mean_q: 129.994 Interval 485 (242000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9699 2 episodes - episode_reward: -262.652 [-267.473, -257.831] - loss: 65.992 - mae: 111.959 - mean_q: 130.560 Interval 486 (242500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4614 1 episodes - episode_reward: -220.084 [-220.084, -220.084] - loss: 59.076 - mae: 111.382 - mean_q: 131.457 Interval 487 (243000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0949 Interval 488 (243500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9981 3 episodes - episode_reward: -189.451 [-232.899, -133.302] - loss: 57.439 - mae: 110.404 - mean_q: 130.877 Interval 489 (244000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8065 1 episodes - episode_reward: -265.159 [-265.159, -265.159] - loss: 73.574 - mae: 110.672 - mean_q: 131.772 Interval 490 (244500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0227 Interval 491 (245000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9361 3 episodes - episode_reward: -363.884 [-525.926, -93.412] - loss: 52.991 - mae: 110.480 - mean_q: 133.144 Interval 492 (245500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0562 Interval 493 (246000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1691 Interval 494 (246500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1926 Interval 495 (247000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.8086 2 episodes - episode_reward: -554.660 [-879.431, -229.889] - loss: 63.156 - mae: 111.277 - mean_q: 136.097 Interval 496 (247500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2076 1 episodes - episode_reward: -119.389 [-119.389, -119.389] - loss: 47.525 - mae: 113.151 - mean_q: 137.961 Interval 497 (248000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2143 Interval 498 (248500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4114 Interval 499 (249000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1117 1 episodes - episode_reward: -426.297 [-426.297, -426.297] - loss: 50.732 - mae: 119.085 - mean_q: 145.891 Interval 500 (249500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1788 Interval 501 (250000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2191 Interval 502 (250500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6141 1 episodes - episode_reward: -417.841 [-417.841, -417.841] - loss: 67.608 - mae: 132.270 - mean_q: 163.675 Interval 503 (251000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0662 Interval 504 (251500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2901 Interval 505 (252000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9112 1 episodes - episode_reward: -620.504 [-620.504, -620.504] - loss: 79.892 - mae: 144.945 - mean_q: 179.866 Interval 506 (252500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2118 1 episodes - episode_reward: -137.736 [-137.736, -137.736] - loss: 75.332 - mae: 148.344 - mean_q: 184.534 Interval 507 (253000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6413 1 episodes - episode_reward: -317.326 [-317.326, -317.326] - loss: 72.799 - mae: 148.563 - mean_q: 185.903 Interval 508 (253500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0858 Interval 509 (254000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0065 1 episodes - episode_reward: -529.641 [-529.641, -529.641] - loss: 98.125 - mae: 159.973 - mean_q: 201.773 Interval 510 (254500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0976 Interval 511 (255000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4703 Interval 512 (255500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9443 1 episodes - episode_reward: -656.797 [-656.797, -656.797] - loss: 96.820 - mae: 173.831 - mean_q: 220.765 Interval 513 (256000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6885 2 episodes - episode_reward: -458.039 [-555.705, -360.373] - loss: 92.781 - mae: 176.939 - mean_q: 223.565 Interval 514 (256500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2620 Interval 515 (257000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1667 Interval 516 (257500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2297 Interval 517 (258000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2094 Interval 518 (258500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2240 Interval 519 (259000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0673 Interval 520 (259500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.3554 Interval 521 (260000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.1891 Interval 522 (260500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1995 Interval 523 (261000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2922 Interval 524 (261500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.2711 Interval 525 (262000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.2285 Interval 526 (262500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.2829 Interval 527 (263000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.7192 Interval 528 (263500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.6938 1 episodes - episode_reward: -2129.110 [-2129.110, -2129.110] - loss: 120.765 - mae: 251.441 - mean_q: 324.423 Interval 529 (264000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5265 Interval 530 (264500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4590 Interval 531 (265000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3358 Interval 532 (265500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4469 Interval 533 (266000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6095 Interval 534 (266500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.9012 3 episodes - episode_reward: -1082.726 [-2078.876, -425.942] - loss: 135.537 - mae: 264.972 - mean_q: 341.255 Interval 535 (267000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8878 1 episodes - episode_reward: -109.477 [-109.477, -109.477] - loss: 162.528 - mae: 261.788 - mean_q: 335.898 Interval 536 (267500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6261 Interval 537 (268000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5139 1 episodes - episode_reward: -1330.284 [-1330.284, -1330.284] - loss: 112.527 - mae: 265.679 - mean_q: 341.254 Interval 538 (268500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4379 Interval 539 (269000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4174 Interval 540 (269500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3460 Interval 541 (270000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3782 Interval 542 (270500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4508 Interval 543 (271000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4607 Interval 544 (271500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8515 1 episodes - episode_reward: -2054.398 [-2054.398, -2054.398] - loss: 107.955 - mae: 262.060 - mean_q: 337.160 Interval 545 (272000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8514 3 episodes - episode_reward: -289.625 [-464.368, -109.532] - loss: 107.968 - mae: 262.812 - mean_q: 337.983 Interval 546 (272500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5902 2 episodes - episode_reward: -260.258 [-386.469, -134.047] - loss: 126.847 - mae: 264.296 - mean_q: 339.309 Interval 547 (273000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5739 1 episodes - episode_reward: -313.123 [-313.123, -313.123] - loss: 148.666 - mae: 268.169 - mean_q: 345.487 Interval 548 (273500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0564 1 episodes - episode_reward: -35.107 [-35.107, -35.107] - loss: 143.282 - mae: 274.912 - mean_q: 355.926 Interval 549 (274000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2588 2 episodes - episode_reward: -79.713 [-99.632, -59.794] - loss: 148.212 - mae: 284.998 - mean_q: 371.589 Interval 550 (274500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6324 2 episodes - episode_reward: -147.605 [-235.538, -59.673] - loss: 170.766 - mae: 286.688 - mean_q: 375.576 Interval 551 (275000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3559 Interval 552 (275500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0070 Interval 553 (276000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1353 1 episodes - episode_reward: -203.947 [-203.947, -203.947] - loss: 217.932 - mae: 314.760 - mean_q: 414.921 Interval 554 (276500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4621 1 episodes - episode_reward: 161.574 [161.574, 161.574] - loss: 221.193 - mae: 323.624 - mean_q: 428.035 Interval 555 (277000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5611 1 episodes - episode_reward: -78.303 [-78.303, -78.303] - loss: 226.154 - mae: 338.541 - mean_q: 448.823 Interval 556 (277500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2375 Interval 557 (278000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1805 Interval 558 (278500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.3002 Interval 559 (279000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.4617 Interval 560 (279500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.0452 Interval 561 (280000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: 0.0061 Interval 562 (280500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1841 Interval 563 (281000 steps performed) 500/500 [==============================] - 10s 19ms/step - reward: -0.2012 Interval 564 (281500 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.1529 Interval 565 (282000 steps performed) 500/500 [==============================] - 11s 22ms/step - reward: -0.3869 Interval 566 (282500 steps performed) 500/500 [==============================] - 11s 23ms/step - reward: -0.1524 Interval 567 (283000 steps performed) 500/500 [==============================] - 12s 24ms/step - reward: -0.1970 Interval 568 (283500 steps performed) 500/500 [==============================] - 13s 25ms/step - reward: -0.1030 Interval 569 (284000 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -0.2875 Interval 570 (284500 steps performed) 500/500 [==============================] - 14s 27ms/step - reward: -0.0809 Interval 571 (285000 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -0.2121 Interval 572 (285500 steps performed) 500/500 [==============================] - 15s 29ms/step - reward: -0.1482 Interval 573 (286000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -0.1554 Interval 574 (286500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -0.1543 Interval 575 (287000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -0.1839 Interval 576 (287500 steps performed) 500/500 [==============================] - 16s 31ms/step - reward: -0.1268 Interval 577 (288000 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -0.1807 Interval 578 (288500 steps performed) 500/500 [==============================] - 17s 34ms/step - reward: -0.1636 Interval 579 (289000 steps performed) 500/500 [==============================] - 18s 35ms/step - reward: -0.1895 Interval 580 (289500 steps performed) 500/500 [==============================] - 19s 38ms/step - reward: -0.2614 Interval 581 (290000 steps performed) 500/500 [==============================] - 19s 37ms/step - reward: -0.2359 Interval 582 (290500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -1.5518 4 episodes - episode_reward: -798.244 [-2577.486, -120.658] - loss: 348.454 - mae: 469.376 - mean_q: 633.094 Interval 583 (291000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3539 3 episodes - episode_reward: -227.446 [-282.416, -134.822] - loss: 341.258 - mae: 492.008 - mean_q: 664.041 Interval 584 (291500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8852 1 episodes - episode_reward: -296.683 [-296.683, -296.683] - loss: 419.153 - mae: 509.536 - mean_q: 688.174 Interval 585 (292000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6899 5 episodes - episode_reward: -273.861 [-376.431, -113.258] - loss: 516.201 - mae: 542.477 - mean_q: 734.189 Interval 586 (292500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9713 3 episodes - episode_reward: -339.196 [-384.992, -297.400] - loss: 575.866 - mae: 573.607 - mean_q: 777.874 Interval 587 (293000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6678 2 episodes - episode_reward: -344.193 [-374.147, -314.239] - loss: 575.389 - mae: 612.340 - mean_q: 832.239 Interval 588 (293500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8135 1 episodes - episode_reward: -357.235 [-357.235, -357.235] - loss: 720.722 - mae: 657.863 - mean_q: 896.646 Interval 589 (294000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6457 3 episodes - episode_reward: -267.565 [-394.983, -86.718] - loss: 731.952 - mae: 728.402 - mean_q: 994.914 Interval 590 (294500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2291 6 episodes - episode_reward: -195.348 [-345.048, -7.199] - loss: 1005.134 - mae: 789.331 - mean_q: 1078.945 Interval 591 (295000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8360 4 episodes - episode_reward: -235.196 [-352.255, -117.678] - loss: 1138.612 - mae: 871.311 - mean_q: 1192.547 Interval 592 (295500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5710 3 episodes - episode_reward: -283.136 [-376.047, -191.077] - loss: 1314.717 - mae: 978.336 - mean_q: 1340.792 Interval 593 (296000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3128 4 episodes - episode_reward: -169.232 [-271.478, -50.367] - loss: 1548.907 - mae: 1058.450 - mean_q: 1450.746 Interval 594 (296500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1905 4 episodes - episode_reward: -272.606 [-354.752, -195.628] - loss: 1602.448 - mae: 1173.583 - mean_q: 1607.120 Interval 595 (297000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4752 4 episodes - episode_reward: -328.798 [-515.106, -186.900] - loss: 2030.512 - mae: 1285.401 - mean_q: 1761.681 Interval 596 (297500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7682 1 episodes - episode_reward: -383.364 [-383.364, -383.364] - loss: 2239.628 - mae: 1430.310 - mean_q: 1960.804 Interval 597 (298000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8790 1 episodes - episode_reward: -396.968 [-396.968, -396.968] - loss: 2262.638 - mae: 1551.479 - mean_q: 2124.639 Interval 598 (298500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9225 1 episodes - episode_reward: -342.362 [-342.362, -342.362] - loss: 2450.041 - mae: 1675.381 - mean_q: 2290.776 Interval 599 (299000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4574 2 episodes - episode_reward: -390.634 [-401.956, -379.311] - loss: 2446.787 - mae: 1819.325 - mean_q: 2483.148 Interval 600 (299500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0856 1 episodes - episode_reward: -504.291 [-504.291, -504.291] - loss: 2531.140 - mae: 1931.103 - mean_q: 2634.145 Interval 601 (300000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4002 Interval 602 (300500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4776 2 episodes - episode_reward: -458.306 [-636.707, -279.904] - loss: 2671.288 - mae: 2139.194 - mean_q: 2906.297 Interval 603 (301000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2334 Interval 604 (301500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0382 Interval 605 (302000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3997 Interval 606 (302500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8594 4 episodes - episode_reward: -481.323 [-902.408, -153.206] - loss: 3130.312 - mae: 2439.581 - mean_q: 3305.491 Interval 607 (303000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0289 6 episodes - episode_reward: -209.199 [-316.287, -132.272] - loss: 4860.583 - mae: 2496.138 - mean_q: 3383.053 Interval 608 (303500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7646 4 episodes - episode_reward: -548.638 [-857.643, -290.997] - loss: 4907.569 - mae: 2535.681 - mean_q: 3436.769 Interval 609 (304000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6205 3 episodes - episode_reward: -707.622 [-863.705, -593.124] - loss: 5721.349 - mae: 2582.501 - mean_q: 3496.472 Interval 610 (304500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3545 5 episodes - episode_reward: -361.220 [-899.785, -132.563] - loss: 6730.679 - mae: 2634.271 - mean_q: 3566.278 Interval 611 (305000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3239 1 episodes - episode_reward: -530.249 [-530.249, -530.249] - loss: 8112.797 - mae: 2708.689 - mean_q: 3663.356 Interval 612 (305500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8728 Interval 613 (306000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1026 2 episodes - episode_reward: -473.806 [-808.457, -139.156] - loss: 11631.586 - mae: 2907.700 - mean_q: 3931.868 Interval 614 (306500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7739 Interval 615 (307000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4570 6 episodes - episode_reward: -291.282 [-979.568, -123.008] - loss: 18045.043 - mae: 3155.487 - mean_q: 4261.286 Interval 616 (307500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5852 7 episodes - episode_reward: -197.158 [-277.122, -147.519] - loss: 16939.961 - mae: 3325.193 - mean_q: 4490.371 Interval 617 (308000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6754 6 episodes - episode_reward: -141.048 [-273.972, -20.324] - loss: 19044.834 - mae: 3454.221 - mean_q: 4657.839 Interval 618 (308500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8582 6 episodes - episode_reward: -145.071 [-269.399, -13.940] - loss: 17402.832 - mae: 3613.373 - mean_q: 4876.750 Interval 619 (309000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5481 5 episodes - episode_reward: -96.196 [-210.814, 27.238] - loss: 17350.281 - mae: 3768.463 - mean_q: 5074.074 Interval 620 (309500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0726 Interval 621 (310000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1704 Interval 622 (310500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2981 Interval 623 (311000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0716 Interval 624 (311500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1746 Interval 625 (312000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1747 Interval 626 (312500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2055 Interval 627 (313000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1074 Interval 628 (313500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2074 Interval 629 (314000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1740 Interval 630 (314500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1452 Interval 631 (315000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1383 Interval 632 (315500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.1630 1 episodes - episode_reward: -817.497 [-817.497, -817.497] - loss: 10866.035 - mae: 3845.051 - mean_q: 5183.975 Interval 633 (316000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1076 Interval 634 (316500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1929 Interval 635 (317000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0706 1 episodes - episode_reward: -424.491 [-424.491, -424.491] - loss: 9109.063 - mae: 3699.739 - mean_q: 4985.828 Interval 636 (317500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5054 1 episodes - episode_reward: -353.211 [-353.211, -353.211] - loss: 9990.821 - mae: 3657.490 - mean_q: 4928.265 Interval 637 (318000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0035 Interval 638 (318500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0893 Interval 639 (319000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6228 1 episodes - episode_reward: -413.950 [-413.950, -413.950] - loss: 7609.811 - mae: 3490.210 - mean_q: 4705.191 Interval 640 (319500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0364 Interval 641 (320000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7814 1 episodes - episode_reward: -334.374 [-334.374, -334.374] - loss: 7413.814 - mae: 3399.326 - mean_q: 4579.792 Interval 642 (320500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8464 1 episodes - episode_reward: -544.990 [-544.990, -544.990] - loss: 7777.961 - mae: 3344.949 - mean_q: 4509.802 Interval 643 (321000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3574 Interval 644 (321500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9924 4 episodes - episode_reward: -287.877 [-622.040, -96.856] - loss: 6976.489 - mae: 3237.802 - mean_q: 4365.522 Interval 645 (322000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2595 1 episodes - episode_reward: -636.164 [-636.164, -636.164] - loss: 6144.655 - mae: 3182.505 - mean_q: 4291.509 Interval 646 (322500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1155 Interval 647 (323000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8714 Interval 648 (323500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6786 1 episodes - episode_reward: -935.831 [-935.831, -935.831] - loss: 6700.678 - mae: 3036.406 - mean_q: 4091.534 Interval 649 (324000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4128 1 episodes - episode_reward: -908.496 [-908.496, -908.496] - loss: 4690.596 - mae: 2953.154 - mean_q: 3980.683 Interval 650 (324500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6492 1 episodes - episode_reward: -764.171 [-764.171, -764.171] - loss: 7944.940 - mae: 2902.629 - mean_q: 3911.831 Interval 651 (325000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7757 1 episodes - episode_reward: -672.833 [-672.833, -672.833] - loss: 6838.857 - mae: 2828.500 - mean_q: 3814.455 Interval 652 (325500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3122 Interval 653 (326000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9861 Interval 654 (326500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3273 1 episodes - episode_reward: -916.656 [-916.656, -916.656] - loss: 6161.486 - mae: 2710.446 - mean_q: 3659.021 Interval 655 (327000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5741 1 episodes - episode_reward: -943.472 [-943.472, -943.472] - loss: 6511.403 - mae: 2677.934 - mean_q: 3615.427 Interval 656 (327500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8724 Interval 657 (328000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7787 2 episodes - episode_reward: -563.956 [-1007.534, -120.379] - loss: 5219.725 - mae: 2603.361 - mean_q: 3517.768 Interval 658 (328500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4709 1 episodes - episode_reward: -692.670 [-692.670, -692.670] - loss: 6464.724 - mae: 2590.404 - mean_q: 3499.290 Interval 659 (329000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0301 2 episodes - episode_reward: -529.777 [-781.289, -278.266] - loss: 3865.974 - mae: 2534.665 - mean_q: 3427.369 Interval 660 (329500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6776 3 episodes - episode_reward: -406.855 [-837.594, -164.934] - loss: 6763.883 - mae: 2503.279 - mean_q: 3387.333 Interval 661 (330000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6536 Interval 662 (330500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9135 Interval 663 (331000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2173 1 episodes - episode_reward: -2596.497 [-2596.497, -2596.497] - loss: 5553.493 - mae: 2464.716 - mean_q: 3339.614 Interval 664 (331500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1809 1 episodes - episode_reward: -1377.646 [-1377.646, -1377.646] - loss: 6143.546 - mae: 2457.970 - mean_q: 3330.256 Interval 665 (332000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9160 2 episodes - episode_reward: -450.788 [-800.879, -100.698] - loss: 9755.139 - mae: 2464.354 - mean_q: 3336.707 Interval 666 (332500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8763 1 episodes - episode_reward: -840.455 [-840.455, -840.455] - loss: 6450.176 - mae: 2424.044 - mean_q: 3282.058 Interval 667 (333000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0543 2 episodes - episode_reward: -275.988 [-365.224, -186.752] - loss: 5760.505 - mae: 2397.532 - mean_q: 3246.315 Interval 668 (333500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9690 1 episodes - episode_reward: -2411.601 [-2411.601, -2411.601] - loss: 6995.976 - mae: 2368.415 - mean_q: 3207.178 Interval 669 (334000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5903 Interval 670 (334500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7783 1 episodes - episode_reward: -1009.490 [-1009.490, -1009.490] - loss: 5074.954 - mae: 2310.099 - mean_q: 3130.019 Interval 671 (335000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0077 4 episodes - episode_reward: -451.139 [-947.353, -160.485] - loss: 4495.154 - mae: 2293.381 - mean_q: 3107.499 Interval 672 (335500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8539 1 episodes - episode_reward: -396.139 [-396.139, -396.139] - loss: 4496.636 - mae: 2279.170 - mean_q: 3086.801 Interval 673 (336000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0503 2 episodes - episode_reward: -804.098 [-814.037, -794.160] - loss: 7224.708 - mae: 2248.480 - mean_q: 3041.664 Interval 674 (336500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7955 1 episodes - episode_reward: -432.656 [-432.656, -432.656] - loss: 5464.462 - mae: 2211.345 - mean_q: 2993.997 Interval 675 (337000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5487 2 episodes - episode_reward: -464.904 [-725.266, -204.542] - loss: 3885.164 - mae: 2179.385 - mean_q: 2950.924 Interval 676 (337500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6558 2 episodes - episode_reward: -421.301 [-664.725, -177.877] - loss: 4820.250 - mae: 2157.759 - mean_q: 2920.070 Interval 677 (338000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5573 1 episodes - episode_reward: -811.486 [-811.486, -811.486] - loss: 7422.184 - mae: 2139.255 - mean_q: 2890.702 Interval 678 (338500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3379 1 episodes - episode_reward: -808.965 [-808.965, -808.965] - loss: 4556.592 - mae: 2097.315 - mean_q: 2837.238 Interval 679 (339000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5131 Interval 680 (339500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4602 1 episodes - episode_reward: -981.482 [-981.482, -981.482] - loss: 5143.781 - mae: 2056.006 - mean_q: 2777.579 Interval 681 (340000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5289 3 episodes - episode_reward: -314.195 [-459.750, -186.789] - loss: 5078.466 - mae: 2041.048 - mean_q: 2756.902 Interval 682 (340500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3631 1 episodes - episode_reward: -597.890 [-597.890, -597.890] - loss: 3185.464 - mae: 1981.542 - mean_q: 2676.965 Interval 683 (341000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3695 1 episodes - episode_reward: -796.572 [-796.572, -796.572] - loss: 4726.920 - mae: 1974.136 - mean_q: 2663.598 Interval 684 (341500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7229 Interval 685 (342000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1836 Interval 686 (342500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2328 Interval 687 (343000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.7991 Interval 688 (343500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.0629 1 episodes - episode_reward: -2564.468 [-2564.468, -2564.468] - loss: 2948.823 - mae: 1786.461 - mean_q: 2407.247 Interval 689 (344000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1647 1 episodes - episode_reward: -688.051 [-688.051, -688.051] - loss: 3625.158 - mae: 1744.683 - mean_q: 2349.535 Interval 690 (344500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3112 2 episodes - episode_reward: -198.009 [-277.538, -118.480] - loss: 2923.167 - mae: 1712.191 - mean_q: 2304.809 Interval 691 (345000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9797 Interval 692 (345500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.8631 1 episodes - episode_reward: -2765.840 [-2765.840, -2765.840] - loss: 2821.140 - mae: 1629.014 - mean_q: 2191.882 Interval 693 (346000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8868 1 episodes - episode_reward: -155.740 [-155.740, -155.740] - loss: 2631.104 - mae: 1610.979 - mean_q: 2166.529 Interval 694 (346500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6106 Interval 695 (347000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.8883 1 episodes - episode_reward: -3043.768 [-3043.768, -3043.768] - loss: 1792.806 - mae: 1559.425 - mean_q: 2097.155 Interval 696 (347500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8074 1 episodes - episode_reward: -224.813 [-224.813, -224.813] - loss: 2192.125 - mae: 1523.338 - mean_q: 2048.098 Interval 697 (348000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3670 4 episodes - episode_reward: -357.740 [-889.650, -99.873] - loss: 2168.925 - mae: 1495.652 - mean_q: 2010.687 Interval 698 (348500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1448 Interval 699 (349000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4554 Interval 700 (349500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5504 1 episodes - episode_reward: -985.998 [-985.998, -985.998] - loss: 1644.725 - mae: 1414.580 - mean_q: 1901.541 Interval 701 (350000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9355 Interval 702 (350500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3408 2 episodes - episode_reward: -824.425 [-971.642, -677.208] - loss: 1757.109 - mae: 1385.274 - mean_q: 1863.293 Interval 703 (351000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8558 3 episodes - episode_reward: -334.271 [-391.863, -288.645] - loss: 2452.477 - mae: 1365.500 - mean_q: 1835.549 Interval 704 (351500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5005 2 episodes - episode_reward: -849.139 [-1381.143, -317.135] - loss: 2043.017 - mae: 1339.858 - mean_q: 1801.779 Interval 705 (352000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1838 Interval 706 (352500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5593 2 episodes - episode_reward: -1112.345 [-1998.367, -226.323] - loss: 1434.796 - mae: 1308.818 - mean_q: 1760.363 Interval 707 (353000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0102 3 episodes - episode_reward: -594.638 [-1289.334, -224.463] - loss: 1884.658 - mae: 1308.827 - mean_q: 1759.703 Interval 708 (353500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4441 1 episodes - episode_reward: -1413.871 [-1413.871, -1413.871] - loss: 1551.699 - mae: 1298.679 - mean_q: 1746.684 Interval 709 (354000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8601 2 episodes - episode_reward: -754.819 [-1320.231, -189.406] - loss: 1340.124 - mae: 1286.672 - mean_q: 1729.609 Interval 710 (354500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6027 1 episodes - episode_reward: -762.721 [-762.721, -762.721] - loss: 1723.969 - mae: 1281.590 - mean_q: 1722.385 Interval 711 (355000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0170 Interval 712 (355500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3714 1 episodes - episode_reward: -1279.106 [-1279.106, -1279.106] - loss: 2463.493 - mae: 1262.291 - mean_q: 1697.080 Interval 713 (356000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6593 1 episodes - episode_reward: -664.230 [-664.230, -664.230] - loss: 1526.499 - mae: 1260.760 - mean_q: 1695.004 Interval 714 (356500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2095 1 episodes - episode_reward: -694.688 [-694.688, -694.688] - loss: 1925.814 - mae: 1240.458 - mean_q: 1666.974 Interval 715 (357000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1350 Interval 716 (357500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1731 3 episodes - episode_reward: -446.382 [-1022.328, -148.545] - loss: 1790.177 - mae: 1231.004 - mean_q: 1653.162 Interval 717 (358000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3564 Interval 718 (358500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.4319 2 episodes - episode_reward: -1183.981 [-2267.962, -100.000] - loss: 1586.455 - mae: 1215.184 - mean_q: 1633.439 Interval 719 (359000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7163 1 episodes - episode_reward: -1353.879 [-1353.879, -1353.879] - loss: 2013.720 - mae: 1200.819 - mean_q: 1613.018 Interval 720 (359500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9528 1 episodes - episode_reward: -136.138 [-136.138, -136.138] - loss: 1144.876 - mae: 1183.196 - mean_q: 1589.578 Interval 721 (360000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6527 1 episodes - episode_reward: -975.785 [-975.785, -975.785] - loss: 1360.078 - mae: 1166.958 - mean_q: 1567.448 Interval 722 (360500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0783 2 episodes - episode_reward: -619.376 [-1056.068, -182.683] - loss: 1177.002 - mae: 1151.220 - mean_q: 1545.583 Interval 723 (361000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4987 1 episodes - episode_reward: -123.633 [-123.633, -123.633] - loss: 1367.888 - mae: 1129.927 - mean_q: 1517.468 Interval 724 (361500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0248 Interval 725 (362000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2382 Interval 726 (362500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2211 Interval 727 (363000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0981 Interval 728 (363500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4388 Interval 729 (364000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0352 Interval 730 (364500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1854 Interval 731 (365000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2170 Interval 732 (365500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6183 2 episodes - episode_reward: -553.286 [-997.536, -109.036] - loss: 1082.857 - mae: 999.163 - mean_q: 1343.282 Interval 733 (366000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4598 2 episodes - episode_reward: -158.721 [-163.534, -153.909] - loss: 985.223 - mae: 987.647 - mean_q: 1327.677 Interval 734 (366500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5822 1 episodes - episode_reward: -32.863 [-32.863, -32.863] - loss: 1278.359 - mae: 977.457 - mean_q: 1313.022 Interval 735 (367000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2141 Interval 736 (367500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6471 1 episodes - episode_reward: -411.399 [-411.399, -411.399] - loss: 1445.302 - mae: 986.322 - mean_q: 1325.959 Interval 737 (368000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1020 Interval 738 (368500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3815 Interval 739 (369000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0139 Interval 740 (369500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1312 Interval 741 (370000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2474 Interval 742 (370500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2832 Interval 743 (371000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1045 Interval 744 (371500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1275 Interval 745 (372000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2208 Interval 746 (372500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2137 Interval 747 (373000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1599 Interval 748 (373500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1079 Interval 749 (374000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.3682 Interval 750 (374500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.0764 Interval 751 (375000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2336 Interval 752 (375500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1145 Interval 753 (376000 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.2053 Interval 754 (376500 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.2978 Interval 755 (377000 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: 0.0862 Interval 756 (377500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.0961 Interval 757 (378000 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.2083 Interval 758 (378500 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.4516 1 episodes - episode_reward: -1936.003 [-1936.003, -1936.003] - loss: 1506.927 - mae: 961.449 - mean_q: 1287.619 Interval 759 (379000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0735 1 episodes - episode_reward: 2.157 [2.157, 2.157] - loss: 1183.957 - mae: 949.421 - mean_q: 1271.177 Interval 760 (379500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1448 Interval 761 (380000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1799 Interval 762 (380500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1609 Interval 763 (381000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1647 2 episodes - episode_reward: -199.161 [-291.961, -106.360] - loss: 1146.338 - mae: 875.408 - mean_q: 1170.304 Interval 764 (381500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2473 2 episodes - episode_reward: -41.415 [-48.858, -33.972] - loss: 773.553 - mae: 856.128 - mean_q: 1144.619 Interval 765 (382000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0432 2 episodes - episode_reward: -251.359 [-368.781, -133.937] - loss: 1099.180 - mae: 845.711 - mean_q: 1130.425 Interval 766 (382500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7092 3 episodes - episode_reward: -151.344 [-192.510, -125.878] - loss: 919.236 - mae: 813.506 - mean_q: 1087.539 Interval 767 (383000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7230 Interval 768 (383500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2500 1 episodes - episode_reward: -372.919 [-372.919, -372.919] - loss: 811.013 - mae: 779.582 - mean_q: 1042.983 Interval 769 (384000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2056 Interval 770 (384500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5367 1 episodes - episode_reward: -203.168 [-203.168, -203.168] - loss: 683.976 - mae: 739.211 - mean_q: 989.238 Interval 771 (385000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1306 2 episodes - episode_reward: -390.948 [-409.004, -372.891] - loss: 646.163 - mae: 720.849 - mean_q: 964.906 Interval 772 (385500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1376 3 episodes - episode_reward: -191.712 [-386.552, -88.585] - loss: 661.347 - mae: 693.312 - mean_q: 928.234 Interval 773 (386000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6892 2 episodes - episode_reward: -167.195 [-237.530, -96.860] - loss: 657.708 - mae: 669.060 - mean_q: 896.005 Interval 774 (386500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5525 4 episodes - episode_reward: -159.013 [-272.798, -40.987] - loss: 678.949 - mae: 654.800 - mean_q: 876.748 Interval 775 (387000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5817 2 episodes - episode_reward: -79.357 [-80.117, -78.598] - loss: 546.765 - mae: 639.500 - mean_q: 856.745 Interval 776 (387500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2750 1 episodes - episode_reward: -2530.255 [-2530.255, -2530.255] - loss: 585.854 - mae: 619.163 - mean_q: 829.623 Interval 777 (388000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -8.1815 1 episodes - episode_reward: -4493.511 [-4493.511, -4493.511] - loss: 668.329 - mae: 595.800 - mean_q: 798.151 Interval 778 (388500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2709 1 episodes - episode_reward: -1450.564 [-1450.564, -1450.564] - loss: 639.257 - mae: 578.775 - mean_q: 775.739 Interval 779 (389000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2380 Interval 780 (389500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -10.2461 1 episodes - episode_reward: -6977.567 [-6977.567, -6977.567] - loss: 475.717 - mae: 545.161 - mean_q: 731.108 Interval 781 (390000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.9587 1 episodes - episode_reward: -3429.039 [-3429.039, -3429.039] - loss: 409.485 - mae: 529.176 - mean_q: 709.861 Interval 782 (390500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1132 Interval 783 (391000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8591 1 episodes - episode_reward: -1460.548 [-1460.548, -1460.548] - loss: 450.650 - mae: 497.209 - mean_q: 666.671 Interval 784 (391500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1592 1 episodes - episode_reward: -1401.552 [-1401.552, -1401.552] - loss: 446.194 - mae: 480.606 - mean_q: 643.998 Interval 785 (392000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6231 Interval 786 (392500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3491 3 episodes - episode_reward: -433.403 [-1068.052, -98.468] - loss: 385.690 - mae: 453.065 - mean_q: 607.607 Interval 787 (393000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8240 1 episodes - episode_reward: -696.115 [-696.115, -696.115] - loss: 287.197 - mae: 438.896 - mean_q: 588.946 Interval 788 (393500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4381 Interval 789 (394000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7439 1 episodes - episode_reward: -406.286 [-406.286, -406.286] - loss: 339.901 - mae: 419.089 - mean_q: 562.395 Interval 790 (394500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8774 2 episodes - episode_reward: -352.533 [-452.684, -252.382] - loss: 280.029 - mae: 408.391 - mean_q: 547.584 Interval 791 (395000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9242 2 episodes - episode_reward: -191.560 [-218.530, -164.590] - loss: 253.150 - mae: 403.965 - mean_q: 541.974 Interval 792 (395500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4444 4 episodes - episode_reward: -199.996 [-246.407, -146.220] - loss: 240.683 - mae: 399.303 - mean_q: 535.341 Interval 793 (396000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0175 1 episodes - episode_reward: -851.430 [-851.430, -851.430] - loss: 272.172 - mae: 387.810 - mean_q: 519.575 Interval 794 (396500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8214 5 episodes - episode_reward: -442.635 [-978.326, -100.000] - loss: 270.058 - mae: 378.859 - mean_q: 507.829 Interval 795 (397000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2490 4 episodes - episode_reward: -491.822 [-1357.345, -29.368] - loss: 247.076 - mae: 371.962 - mean_q: 498.777 Interval 796 (397500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3697 1 episodes - episode_reward: -693.742 [-693.742, -693.742] - loss: 293.723 - mae: 364.939 - mean_q: 489.127 Interval 797 (398000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7838 2 episodes - episode_reward: -211.471 [-264.670, -158.273] - loss: 271.704 - mae: 355.366 - mean_q: 475.966 Interval 798 (398500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5212 1 episodes - episode_reward: -53.160 [-53.160, -53.160] - loss: 257.114 - mae: 344.869 - mean_q: 462.039 Interval 799 (399000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2940 4 episodes - episode_reward: -300.006 [-785.841, -100.000] - loss: 264.674 - mae: 336.272 - mean_q: 450.615 Interval 800 (399500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9656 2 episodes - episode_reward: -523.412 [-853.352, -193.473] - loss: 258.768 - mae: 326.907 - mean_q: 437.727 Interval 801 (400000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6206 Interval 802 (400500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2541 1 episodes - episode_reward: -839.074 [-839.074, -839.074] - loss: 207.795 - mae: 313.603 - mean_q: 420.035 Interval 803 (401000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2100 Interval 804 (401500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8299 Interval 805 (402000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4728 3 episodes - episode_reward: -806.538 [-1828.704, -99.226] - loss: 208.050 - mae: 294.466 - mean_q: 394.279 Interval 806 (402500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1361 Interval 807 (403000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1131 Interval 808 (403500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1723 Interval 809 (404000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1661 Interval 810 (404500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1085 Interval 811 (405000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2116 Interval 812 (405500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1299 Interval 813 (406000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1363 Interval 814 (406500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1839 Interval 815 (407000 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.1892 Interval 816 (407500 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1492 Interval 817 (408000 steps performed) 500/500 [==============================] - 9s 19ms/step - reward: -0.1780 Interval 818 (408500 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.2540 Interval 819 (409000 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.0464 Interval 820 (409500 steps performed) 500/500 [==============================] - 11s 21ms/step - reward: -0.2120 Interval 821 (410000 steps performed) 500/500 [==============================] - 11s 23ms/step - reward: -0.1510 Interval 822 (410500 steps performed) 500/500 [==============================] - 13s 25ms/step - reward: -0.1685 Interval 823 (411000 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -0.1698 Interval 824 (411500 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -0.2114 Interval 825 (412000 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -0.1753 Interval 826 (412500 steps performed) 500/500 [==============================] - 14s 29ms/step - reward: -0.2091 Interval 827 (413000 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -0.1763 Interval 828 (413500 steps performed) 500/500 [==============================] - 15s 29ms/step - reward: -0.1587 Interval 829 (414000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -0.1603 Interval 830 (414500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -0.2622 Interval 831 (415000 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -0.1613 Interval 832 (415500 steps performed) 500/500 [==============================] - 10s 21ms/step - reward: -0.8986 1 episodes - episode_reward: -2637.625 [-2637.625, -2637.625] - loss: 102.168 - mae: 221.421 - mean_q: 295.259 Interval 833 (416000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0074 Interval 834 (416500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6747 1 episodes - episode_reward: -373.877 [-373.877, -373.877] - loss: 84.112 - mae: 214.491 - mean_q: 285.650 Interval 835 (417000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0135 1 episodes - episode_reward: -477.225 [-477.225, -477.225] - loss: 92.868 - mae: 210.714 - mean_q: 280.243 Interval 836 (417500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1864 1 episodes - episode_reward: -511.824 [-511.824, -511.824] - loss: 104.800 - mae: 206.615 - mean_q: 274.499 Interval 837 (418000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3396 2 episodes - episode_reward: -366.750 [-500.766, -232.735] - loss: 83.910 - mae: 202.398 - mean_q: 268.977 Interval 838 (418500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6478 3 episodes - episode_reward: -138.745 [-195.671, -107.657] - loss: 89.927 - mae: 198.743 - mean_q: 263.760 Interval 839 (419000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1213 Interval 840 (419500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1601 Interval 841 (420000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1943 Interval 842 (420500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1494 Interval 843 (421000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.1057 2 episodes - episode_reward: -633.859 [-815.860, -451.859] - loss: 69.330 - mae: 180.779 - mean_q: 239.051 Interval 844 (421500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0627 1 episodes - episode_reward: -1.641 [-1.641, -1.641] - loss: 83.162 - mae: 175.765 - mean_q: 232.320 Interval 845 (422000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9022 2 episodes - episode_reward: -210.193 [-642.573, 222.188] - loss: 75.933 - mae: 172.656 - mean_q: 228.353 Interval 846 (422500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0680 Interval 847 (423000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2358 Interval 848 (423500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0036 Interval 849 (424000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3380 Interval 850 (424500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2447 Interval 851 (425000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1687 2 episodes - episode_reward: -465.519 [-753.309, -177.728] - loss: 66.224 - mae: 156.187 - mean_q: 206.767 Interval 852 (425500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9321 2 episodes - episode_reward: -563.041 [-698.399, -427.684] - loss: 63.276 - mae: 151.873 - mean_q: 200.934 Interval 853 (426000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3846 3 episodes - episode_reward: -212.997 [-525.363, -33.338] - loss: 57.070 - mae: 147.993 - mean_q: 195.994 Interval 854 (426500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8812 1 episodes - episode_reward: -954.477 [-954.477, -954.477] - loss: 60.894 - mae: 142.488 - mean_q: 188.587 Interval 855 (427000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2977 Interval 856 (427500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2390 Interval 857 (428000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0479 Interval 858 (428500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9835 1 episodes - episode_reward: -670.246 [-670.246, -670.246] - loss: 42.500 - mae: 129.904 - mean_q: 172.300 Interval 859 (429000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1681 Interval 860 (429500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1559 1 episodes - episode_reward: 146.611 [146.611, 146.611] - loss: 44.136 - mae: 124.294 - mean_q: 164.998 Interval 861 (430000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1302 Interval 862 (430500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1985 Interval 863 (431000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2158 Interval 864 (431500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1691 Interval 865 (432000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6751 1 episodes - episode_reward: -644.634 [-644.634, -644.634] - loss: 32.300 - mae: 112.365 - mean_q: 148.915 Interval 866 (432500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8406 2 episodes - episode_reward: -181.589 [-182.624, -180.555] - loss: 36.843 - mae: 110.196 - mean_q: 146.117 Interval 867 (433000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2419 2 episodes - episode_reward: -322.250 [-431.846, -212.654] - loss: 37.495 - mae: 108.953 - mean_q: 144.425 Interval 868 (433500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0686 Interval 869 (434000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2203 Interval 870 (434500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0204 Interval 871 (435000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.3776 Interval 872 (435500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.0951 Interval 873 (436000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.1067 Interval 874 (436500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1594 Interval 875 (437000 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.1840 Interval 876 (437500 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.2252 Interval 877 (438000 steps performed) 500/500 [==============================] - 11s 22ms/step - reward: -0.1004 Interval 878 (438500 steps performed) 500/500 [==============================] - 11s 21ms/step - reward: -0.2355 Interval 879 (439000 steps performed) 500/500 [==============================] - 11s 21ms/step - reward: -0.1306 Interval 880 (439500 steps performed) 500/500 [==============================] - 11s 23ms/step - reward: -0.1461 Interval 881 (440000 steps performed) 500/500 [==============================] - 13s 25ms/step - reward: -0.1993 Interval 882 (440500 steps performed) 500/500 [==============================] - 9s 19ms/step - reward: -0.6681 2 episodes - episode_reward: -746.618 [-1378.754, -114.482] - loss: 19.835 - mae: 81.837 - mean_q: 108.110 Interval 883 (441000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0737 Interval 884 (441500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2592 Interval 885 (442000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0928 Interval 886 (442500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1902 Interval 887 (443000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.0834 Interval 888 (443500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3212 2 episodes - episode_reward: -256.307 [-291.520, -221.094] - loss: 16.517 - mae: 73.374 - mean_q: 97.166 Interval 889 (444000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3727 1 episodes - episode_reward: 179.744 [179.744, 179.744] - loss: 19.802 - mae: 71.550 - mean_q: 94.571 Interval 890 (444500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1385 4 episodes - episode_reward: -141.288 [-180.772, -88.290] - loss: 16.733 - mae: 70.245 - mean_q: 93.058 Interval 891 (445000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1742 1 episodes - episode_reward: -140.405 [-140.405, -140.405] - loss: 15.307 - mae: 69.662 - mean_q: 92.262 Interval 892 (445500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7115 2 episodes - episode_reward: -170.491 [-218.791, -122.191] - loss: 15.357 - mae: 68.486 - mean_q: 90.757 Interval 893 (446000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7653 2 episodes - episode_reward: -210.281 [-250.115, -170.447] - loss: 19.319 - mae: 67.411 - mean_q: 89.298 Interval 894 (446500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0230 Interval 895 (447000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5364 4 episodes - episode_reward: -46.400 [-133.900, 184.541] - loss: 14.370 - mae: 65.849 - mean_q: 87.340 Interval 896 (447500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1507 3 episodes - episode_reward: -199.940 [-234.863, -152.978] - loss: 14.362 - mae: 64.754 - mean_q: 85.901 Interval 897 (448000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4855 1 episodes - episode_reward: -294.300 [-294.300, -294.300] - loss: 13.060 - mae: 63.507 - mean_q: 84.205 Interval 898 (448500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0378 Interval 899 (449000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0465 2 episodes - episode_reward: 36.515 [-134.417, 207.448] - loss: 12.620 - mae: 60.937 - mean_q: 80.770 Interval 900 (449500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3486 1 episodes - episode_reward: -139.908 [-139.908, -139.908] - loss: 11.320 - mae: 60.010 - mean_q: 79.551 Interval 901 (450000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7276 2 episodes - episode_reward: -176.062 [-187.645, -164.479] - loss: 12.785 - mae: 58.269 - mean_q: 77.138 Interval 902 (450500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1445 Interval 903 (451000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3430 5 episodes - episode_reward: -149.600 [-208.649, -100.000] - loss: 12.333 - mae: 54.922 - mean_q: 72.614 Interval 904 (451500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1151 Interval 905 (452000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8519 5 episodes - episode_reward: -75.576 [-200.239, 190.374] - loss: 12.557 - mae: 53.603 - mean_q: 71.058 Interval 906 (452500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0489 Interval 907 (453000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2768 1 episodes - episode_reward: 119.192 [119.192, 119.192] - loss: 10.106 - mae: 51.641 - mean_q: 68.352 Interval 908 (453500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6554 2 episodes - episode_reward: -189.870 [-204.284, -175.456] - loss: 11.329 - mae: 50.290 - mean_q: 66.416 Interval 909 (454000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5003 2 episodes - episode_reward: -123.334 [-160.262, -86.405] - loss: 13.476 - mae: 49.544 - mean_q: 65.448 Interval 910 (454500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8116 3 episodes - episode_reward: -138.211 [-192.103, -100.815] - loss: 10.470 - mae: 48.445 - mean_q: 63.919 Interval 911 (455000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2306 Interval 912 (455500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0972 5 episodes - episode_reward: -194.784 [-268.936, -134.701] - loss: 15.368 - mae: 46.188 - mean_q: 60.794 Interval 913 (456000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0309 4 episodes - episode_reward: -414.688 [-738.032, -198.748] - loss: 11.597 - mae: 45.091 - mean_q: 59.228 Interval 914 (456500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8913 4 episodes - episode_reward: -102.285 [-134.628, -43.623] - loss: 13.219 - mae: 44.080 - mean_q: 57.832 Interval 915 (457000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0887 3 episodes - episode_reward: -129.796 [-203.733, -27.711] - loss: 16.465 - mae: 42.977 - mean_q: 56.321 Interval 916 (457500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7989 6 episodes - episode_reward: -253.210 [-382.175, -85.604] - loss: 16.067 - mae: 41.625 - mean_q: 54.417 Interval 917 (458000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6505 7 episodes - episode_reward: -205.230 [-387.495, -84.228] - loss: 15.378 - mae: 40.750 - mean_q: 53.287 Interval 918 (458500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6314 2 episodes - episode_reward: -145.345 [-190.689, -100.000] - loss: 14.225 - mae: 40.211 - mean_q: 52.538 Interval 919 (459000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3803 1 episodes - episode_reward: -195.646 [-195.646, -195.646] - loss: 13.545 - mae: 39.462 - mean_q: 51.489 Interval 920 (459500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6755 3 episodes - episode_reward: -260.394 [-411.863, -181.273] - loss: 16.613 - mae: 38.672 - mean_q: 50.341 Interval 921 (460000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9908 3 episodes - episode_reward: -170.039 [-244.083, -107.134] - loss: 19.127 - mae: 38.006 - mean_q: 49.249 Interval 922 (460500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3277 1 episodes - episode_reward: -178.113 [-178.113, -178.113] - loss: 15.171 - mae: 37.328 - mean_q: 48.338 Interval 923 (461000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5024 3 episodes - episode_reward: -230.088 [-384.513, -111.803] - loss: 17.364 - mae: 36.659 - mean_q: 47.377 Interval 924 (461500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7580 2 episodes - episode_reward: -231.518 [-241.532, -221.504] - loss: 16.549 - mae: 36.103 - mean_q: 46.558 Interval 925 (462000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0963 Interval 926 (462500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0790 Interval 927 (463000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8909 4 episodes - episode_reward: -124.506 [-253.846, 76.654] - loss: 15.665 - mae: 34.495 - mean_q: 44.362 Interval 928 (463500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1599 1 episodes - episode_reward: -92.467 [-92.467, -92.467] - loss: 19.055 - mae: 34.094 - mean_q: 43.653 Interval 929 (464000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2676 3 episodes - episode_reward: -360.551 [-522.217, -103.790] - loss: 18.926 - mae: 33.616 - mean_q: 43.090 Interval 930 (464500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2032 4 episodes - episode_reward: -408.893 [-516.930, -268.138] - loss: 19.415 - mae: 32.832 - mean_q: 41.910 Interval 931 (465000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0227 Interval 932 (465500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1495 Interval 933 (466000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1981 Interval 934 (466500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -1.0970 2 episodes - episode_reward: -339.439 [-345.428, -333.451] - loss: 15.356 - mae: 31.788 - mean_q: 40.491 Interval 935 (467000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6110 3 episodes - episode_reward: -301.433 [-357.482, -196.736] - loss: 16.763 - mae: 31.163 - mean_q: 39.525 Interval 936 (467500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6618 3 episodes - episode_reward: -106.090 [-141.886, -76.383] - loss: 14.483 - mae: 30.578 - mean_q: 38.566 Interval 937 (468000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6955 2 episodes - episode_reward: -299.704 [-466.049, -133.359] - loss: 18.889 - mae: 30.361 - mean_q: 38.200 Interval 938 (468500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1931 1 episodes - episode_reward: -320.770 [-320.770, -320.770] - loss: 17.452 - mae: 29.737 - mean_q: 37.386 Interval 939 (469000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1053 Interval 940 (469500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0090 Interval 941 (470000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0347 Interval 942 (470500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0610 Interval 943 (471000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0077 Interval 944 (471500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0392 Interval 945 (472000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2786 1 episodes - episode_reward: -5.880 [-5.880, -5.880] - loss: 17.632 - mae: 25.674 - mean_q: 31.674 Interval 946 (472500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0901 Interval 947 (473000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1925 1 episodes - episode_reward: -143.477 [-143.477, -143.477] - loss: 18.944 - mae: 24.745 - mean_q: 30.339 Interval 948 (473500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1574 1 episodes - episode_reward: -90.933 [-90.933, -90.933] - loss: 14.398 - mae: 24.395 - mean_q: 29.907 Interval 949 (474000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1684 Interval 950 (474500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1688 Interval 951 (475000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2032 Interval 952 (475500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3208 1 episodes - episode_reward: -77.143 [-77.143, -77.143] - loss: 15.935 - mae: 23.267 - mean_q: 28.169 Interval 953 (476000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1282 Interval 954 (476500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3781 1 episodes - episode_reward: 81.604 [81.604, 81.604] - loss: 17.529 - mae: 23.093 - mean_q: 27.912 Interval 955 (477000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0815 Interval 956 (477500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1600 Interval 957 (478000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1105 Interval 958 (478500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1585 1 episodes - episode_reward: 14.015 [14.015, 14.015] - loss: 15.549 - mae: 22.756 - mean_q: 27.558 Interval 959 (479000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3418 Interval 960 (479500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4190 1 episodes - episode_reward: -674.374 [-674.374, -674.374] - loss: 14.255 - mae: 22.094 - mean_q: 26.642 Interval 961 (480000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9501 2 episodes - episode_reward: -608.991 [-784.143, -433.839] - loss: 14.123 - mae: 21.788 - mean_q: 26.049 Interval 962 (480500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7722 1 episodes - episode_reward: -423.817 [-423.817, -423.817] - loss: 12.145 - mae: 21.755 - mean_q: 25.845 Interval 963 (481000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1402 Interval 964 (481500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9726 1 episodes - episode_reward: -623.806 [-623.806, -623.806] - loss: 14.634 - mae: 21.699 - mean_q: 25.643 Interval 965 (482000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9915 3 episodes - episode_reward: -1134.929 [-2778.653, -218.052] - loss: 17.061 - mae: 21.359 - mean_q: 24.883 Interval 966 (482500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0210 2 episodes - episode_reward: -497.749 [-638.545, -356.953] - loss: 16.138 - mae: 21.194 - mean_q: 24.618 Interval 967 (483000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7949 1 episodes - episode_reward: -420.008 [-420.008, -420.008] - loss: 14.242 - mae: 20.823 - mean_q: 23.942 Interval 968 (483500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8112 3 episodes - episode_reward: -112.974 [-298.360, 143.471] - loss: 16.600 - mae: 21.001 - mean_q: 24.210 Interval 969 (484000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4249 1 episodes - episode_reward: -203.179 [-203.179, -203.179] - loss: 17.466 - mae: 20.891 - mean_q: 23.844 Interval 970 (484500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1925 1 episodes - episode_reward: -192.845 [-192.845, -192.845] - loss: 17.660 - mae: 20.906 - mean_q: 23.600 Interval 971 (485000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5924 Interval 972 (485500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7022 3 episodes - episode_reward: -211.086 [-407.357, -100.000] - loss: 18.435 - mae: 20.615 - mean_q: 23.179 Interval 973 (486000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0525 2 episodes - episode_reward: 23.832 [-125.951, 173.615] - loss: 20.328 - mae: 21.035 - mean_q: 23.522 Interval 974 (486500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9633 2 episodes - episode_reward: -512.632 [-554.423, -470.840] - loss: 13.307 - mae: 21.686 - mean_q: 24.550 Interval 975 (487000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1955 1 episodes - episode_reward: -659.030 [-659.030, -659.030] - loss: 17.308 - mae: 22.215 - mean_q: 25.184 Interval 976 (487500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0835 1 episodes - episode_reward: -485.978 [-485.978, -485.978] - loss: 17.046 - mae: 23.231 - mean_q: 26.599 Interval 977 (488000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7397 2 episodes - episode_reward: -442.970 [-592.768, -293.171] - loss: 15.486 - mae: 24.149 - mean_q: 27.635 Interval 978 (488500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7553 1 episodes - episode_reward: -356.209 [-356.209, -356.209] - loss: 18.356 - mae: 25.063 - mean_q: 28.700 Interval 979 (489000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1386 Interval 980 (489500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2074 Interval 981 (490000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0443 Interval 982 (490500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1354 2 episodes - episode_reward: -601.448 [-1165.440, -37.456] - loss: 17.074 - mae: 26.491 - mean_q: 30.586 Interval 983 (491000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1790 Interval 984 (491500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1519 Interval 985 (492000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1840 Interval 986 (492500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2134 Interval 987 (493000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1401 Interval 988 (493500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1799 Interval 989 (494000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1904 Interval 990 (494500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3205 Interval 991 (495000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9923 3 episodes - episode_reward: -378.980 [-757.587, -180.966] - loss: 16.453 - mae: 28.137 - mean_q: 32.705 Interval 992 (495500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1688 1 episodes - episode_reward: 92.480 [92.480, 92.480] - loss: 14.817 - mae: 28.138 - mean_q: 32.550 Interval 993 (496000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0826 Interval 994 (496500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1805 Interval 995 (497000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2110 Interval 996 (497500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2942 1 episodes - episode_reward: -107.448 [-107.448, -107.448] - loss: 14.136 - mae: 28.783 - mean_q: 34.023 Interval 997 (498000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0993 Interval 998 (498500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1438 Interval 999 (499000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1318 Interval 1000 (499500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3044 Interval 1001 (500000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0021 Interval 1002 (500500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0967 Interval 1003 (501000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0071 2 episodes - episode_reward: -197.896 [-244.346, -151.446] - loss: 14.259 - mae: 28.950 - mean_q: 34.692 Interval 1004 (501500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0911 Interval 1005 (502000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2087 Interval 1006 (502500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2666 1 episodes - episode_reward: 17.944 [17.944, 17.944] - loss: 12.733 - mae: 28.845 - mean_q: 34.743 Interval 1007 (503000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0416 Interval 1008 (503500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1294 Interval 1009 (504000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5534 3 episodes - episode_reward: -138.543 [-237.459, 35.016] - loss: 14.741 - mae: 28.973 - mean_q: 35.036 Interval 1010 (504500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1347 Interval 1011 (505000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0481 Interval 1012 (505500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3650 1 episodes - episode_reward: 79.855 [79.855, 79.855] - loss: 13.237 - mae: 29.256 - mean_q: 35.464 Interval 1013 (506000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1247 Interval 1014 (506500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2044 Interval 1015 (507000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.2136 1 episodes - episode_reward: -9.062 [-9.062, -9.062] - loss: 10.576 - mae: 29.066 - mean_q: 35.606 Interval 1016 (507500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0352 Interval 1017 (508000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1636 Interval 1018 (508500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1022 1 episodes - episode_reward: 39.026 [39.026, 39.026] - loss: 10.647 - mae: 28.909 - mean_q: 36.009 Interval 1019 (509000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6511 3 episodes - episode_reward: -136.538 [-237.047, -33.096] - loss: 9.064 - mae: 28.704 - mean_q: 35.514 Interval 1020 (509500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2167 1 episodes - episode_reward: 190.205 [190.205, 190.205] - loss: 9.180 - mae: 28.420 - mean_q: 35.305 Interval 1021 (510000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0227 Interval 1022 (510500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2172 1 episodes - episode_reward: -129.541 [-129.541, -129.541] - loss: 7.859 - mae: 28.072 - mean_q: 34.987 Interval 1023 (511000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0852 Interval 1024 (511500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0370 Interval 1025 (512000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3015 1 episodes - episode_reward: 58.240 [58.240, 58.240] - loss: 9.699 - mae: 27.321 - mean_q: 33.848 Interval 1026 (512500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2890 3 episodes - episode_reward: -27.803 [-149.267, 165.859] - loss: 9.340 - mae: 27.388 - mean_q: 34.041 Interval 1027 (513000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3246 1 episodes - episode_reward: -235.746 [-235.746, -235.746] - loss: 9.921 - mae: 27.409 - mean_q: 33.884 Interval 1028 (513500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3618 2 episodes - episode_reward: 4.225 [-167.354, 175.805] - loss: 9.042 - mae: 27.380 - mean_q: 33.967 Interval 1029 (514000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1851 1 episodes - episode_reward: -294.763 [-294.763, -294.763] - loss: 8.793 - mae: 27.421 - mean_q: 33.924 Interval 1030 (514500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4824 1 episodes - episode_reward: 175.698 [175.698, 175.698] - loss: 6.783 - mae: 27.191 - mean_q: 33.788 Interval 1031 (515000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3622 1 episodes - episode_reward: 262.990 [262.990, 262.990] - loss: 6.493 - mae: 26.861 - mean_q: 33.454 Interval 1032 (515500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0881 Interval 1033 (516000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3081 1 episodes - episode_reward: 132.805 [132.805, 132.805] - loss: 9.536 - mae: 26.935 - mean_q: 33.534 Interval 1034 (516500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1791 Interval 1035 (517000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1567 Interval 1036 (517500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2005 Interval 1037 (518000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1706 Interval 1038 (518500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1757 Interval 1039 (519000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2176 Interval 1040 (519500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2351 Interval 1041 (520000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0619 Interval 1042 (520500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.0210 Interval 1043 (521000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0049 2 episodes - episode_reward: -377.562 [-551.305, -203.819] - loss: 6.302 - mae: 27.803 - mean_q: 34.929 Interval 1044 (521500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0826 Interval 1045 (522000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0252 2 episodes - episode_reward: 35.844 [-88.174, 159.862] - loss: 6.713 - mae: 27.923 - mean_q: 35.166 Interval 1046 (522500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2727 5 episodes - episode_reward: -130.562 [-192.205, -84.885] - loss: 7.810 - mae: 28.175 - mean_q: 35.264 Interval 1047 (523000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0659 Interval 1048 (523500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8915 3 episodes - episode_reward: -175.173 [-238.575, -139.133] - loss: 7.595 - mae: 28.504 - mean_q: 35.553 Interval 1049 (524000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0360 1 episodes - episode_reward: -48.854 [-48.854, -48.854] - loss: 7.083 - mae: 28.737 - mean_q: 35.892 Interval 1050 (524500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2488 2 episodes - episode_reward: 97.709 [-16.383, 211.801] - loss: 7.213 - mae: 29.105 - mean_q: 36.357 Interval 1051 (525000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7469 3 episodes - episode_reward: -111.695 [-155.569, -64.439] - loss: 7.902 - mae: 29.463 - mean_q: 36.830 Interval 1052 (525500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1276 Interval 1053 (526000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3258 3 episodes - episode_reward: -58.040 [-241.464, 165.258] - loss: 6.348 - mae: 29.441 - mean_q: 36.754 Interval 1054 (526500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2540 9 episodes - episode_reward: -287.223 [-538.055, -70.449] - loss: 8.918 - mae: 29.335 - mean_q: 36.306 Interval 1055 (527000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4200 2 episodes - episode_reward: -79.448 [-331.726, 172.829] - loss: 9.386 - mae: 29.503 - mean_q: 36.392 Interval 1056 (527500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5094 7 episodes - episode_reward: -318.769 [-494.067, -99.434] - loss: 7.382 - mae: 29.513 - mean_q: 36.281 Interval 1057 (528000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3643 4 episodes - episode_reward: -322.740 [-669.270, -167.962] - loss: 9.568 - mae: 29.734 - mean_q: 36.587 Interval 1058 (528500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0235 Interval 1059 (529000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4633 2 episodes - episode_reward: -97.128 [-342.694, 148.439] - loss: 9.582 - mae: 29.611 - mean_q: 36.190 Interval 1060 (529500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0064 Interval 1061 (530000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2867 1 episodes - episode_reward: 122.621 [122.621, 122.621] - loss: 8.455 - mae: 29.750 - mean_q: 36.612 Interval 1062 (530500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0075 Interval 1063 (531000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2772 1 episodes - episode_reward: 184.512 [184.512, 184.512] - loss: 8.881 - mae: 30.175 - mean_q: 37.250 Interval 1064 (531500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5893 4 episodes - episode_reward: -447.666 [-592.026, -248.863] - loss: 9.356 - mae: 30.314 - mean_q: 37.498 Interval 1065 (532000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1554 1 episodes - episode_reward: -159.728 [-159.728, -159.728] - loss: 10.424 - mae: 30.387 - mean_q: 37.748 Interval 1066 (532500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1473 3 episodes - episode_reward: -173.827 [-581.296, 169.238] - loss: 8.886 - mae: 30.564 - mean_q: 38.064 Interval 1067 (533000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0366 Interval 1068 (533500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3413 2 episodes - episode_reward: -131.750 [-384.022, 120.522] - loss: 8.681 - mae: 30.893 - mean_q: 38.492 Interval 1069 (534000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7948 1 episodes - episode_reward: -262.139 [-262.139, -262.139] - loss: 8.790 - mae: 31.556 - mean_q: 39.236 Interval 1070 (534500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3057 3 episodes - episode_reward: -227.233 [-284.802, -182.023] - loss: 8.998 - mae: 31.966 - mean_q: 40.006 Interval 1071 (535000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9718 5 episodes - episode_reward: -194.772 [-312.614, -118.843] - loss: 8.631 - mae: 32.244 - mean_q: 39.947 Interval 1072 (535500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3724 1 episodes - episode_reward: -274.885 [-274.885, -274.885] - loss: 9.074 - mae: 32.351 - mean_q: 39.784 Interval 1073 (536000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2864 1 episodes - episode_reward: 196.700 [196.700, 196.700] - loss: 9.421 - mae: 32.854 - mean_q: 40.658 Interval 1074 (536500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2680 2 episodes - episode_reward: -90.210 [-111.784, -68.637] - loss: 8.433 - mae: 32.916 - mean_q: 40.752 Interval 1075 (537000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7421 2 episodes - episode_reward: -193.572 [-297.547, -89.597] - loss: 8.740 - mae: 32.521 - mean_q: 40.238 Interval 1076 (537500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8645 8 episodes - episode_reward: -170.846 [-248.821, -106.713] - loss: 9.206 - mae: 32.440 - mean_q: 40.158 Interval 1077 (538000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6618 3 episodes - episode_reward: -292.991 [-315.710, -269.815] - loss: 9.838 - mae: 32.032 - mean_q: 39.377 Interval 1078 (538500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5854 1 episodes - episode_reward: -35.178 [-35.178, -35.178] - loss: 8.868 - mae: 31.820 - mean_q: 39.101 Interval 1079 (539000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5133 1 episodes - episode_reward: -540.425 [-540.425, -540.425] - loss: 9.697 - mae: 31.288 - mean_q: 38.408 Interval 1080 (539500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9860 2 episodes - episode_reward: -246.214 [-674.385, 181.957] - loss: 10.565 - mae: 30.816 - mean_q: 37.573 Interval 1081 (540000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6206 1 episodes - episode_reward: -305.124 [-305.124, -305.124] - loss: 10.428 - mae: 30.446 - mean_q: 36.739 Interval 1082 (540500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0647 1 episodes - episode_reward: 215.999 [215.999, 215.999] - loss: 8.750 - mae: 30.150 - mean_q: 36.635 Interval 1083 (541000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4352 1 episodes - episode_reward: -449.664 [-449.664, -449.664] - loss: 11.588 - mae: 29.952 - mean_q: 36.398 Interval 1084 (541500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3688 1 episodes - episode_reward: 174.982 [174.982, 174.982] - loss: 10.659 - mae: 30.088 - mean_q: 36.315 Interval 1085 (542000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0224 Interval 1086 (542500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1108 1 episodes - episode_reward: 174.027 [174.027, 174.027] - loss: 9.788 - mae: 29.629 - mean_q: 35.660 Interval 1087 (543000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5965 3 episodes - episode_reward: -91.262 [-252.300, 141.945] - loss: 8.888 - mae: 29.643 - mean_q: 35.621 Interval 1088 (543500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4886 2 episodes - episode_reward: -209.459 [-246.138, -172.780] - loss: 8.591 - mae: 29.428 - mean_q: 34.950 Interval 1089 (544000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0308 Interval 1090 (544500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7281 3 episodes - episode_reward: -60.321 [-205.228, 201.735] - loss: 11.196 - mae: 28.920 - mean_q: 34.148 Interval 1091 (545000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1824 2 episodes - episode_reward: -331.063 [-463.648, -198.478] - loss: 10.260 - mae: 28.818 - mean_q: 34.126 Interval 1092 (545500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5929 2 episodes - episode_reward: -164.488 [-228.976, -100.000] - loss: 10.108 - mae: 28.705 - mean_q: 33.917 Interval 1093 (546000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3128 1 episodes - episode_reward: 159.658 [159.658, 159.658] - loss: 11.694 - mae: 28.491 - mean_q: 33.326 Interval 1094 (546500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0874 Interval 1095 (547000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4184 1 episodes - episode_reward: 180.096 [180.096, 180.096] - loss: 11.089 - mae: 28.981 - mean_q: 34.295 Interval 1096 (547500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0429 Interval 1097 (548000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2220 3 episodes - episode_reward: -24.999 [-171.172, 196.175] - loss: 10.403 - mae: 29.050 - mean_q: 34.164 Interval 1098 (548500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3670 1 episodes - episode_reward: 228.358 [228.358, 228.358] - loss: 9.421 - mae: 29.008 - mean_q: 33.971 Interval 1099 (549000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1080 1 episodes - episode_reward: -94.694 [-94.694, -94.694] - loss: 11.370 - mae: 28.541 - mean_q: 33.338 Interval 1100 (549500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0942 Interval 1101 (550000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2046 4 episodes - episode_reward: -115.270 [-381.981, 214.067] - loss: 12.512 - mae: 28.116 - mean_q: 32.465 Interval 1102 (550500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1080 3 episodes - episode_reward: -158.774 [-231.179, -17.765] - loss: 10.486 - mae: 27.677 - mean_q: 31.408 Interval 1103 (551000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6963 2 episodes - episode_reward: -216.264 [-332.529, -100.000] - loss: 10.486 - mae: 27.616 - mean_q: 31.510 Interval 1104 (551500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2167 3 episodes - episode_reward: -188.280 [-248.234, -155.798] - loss: 11.533 - mae: 27.726 - mean_q: 31.267 Interval 1105 (552000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9884 3 episodes - episode_reward: -154.180 [-188.619, -100.000] - loss: 12.376 - mae: 27.563 - mean_q: 30.722 Interval 1106 (552500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4206 3 episodes - episode_reward: -320.534 [-447.303, -199.816] - loss: 12.145 - mae: 27.683 - mean_q: 30.730 Interval 1107 (553000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3818 3 episodes - episode_reward: -195.269 [-442.561, 277.009] - loss: 13.480 - mae: 27.700 - mean_q: 30.339 Interval 1108 (553500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1954 1 episodes - episode_reward: -694.617 [-694.617, -694.617] - loss: 11.040 - mae: 28.132 - mean_q: 31.117 Interval 1109 (554000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4550 2 episodes - episode_reward: -614.543 [-750.695, -478.392] - loss: 13.310 - mae: 28.100 - mean_q: 30.239 Interval 1110 (554500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6130 2 episodes - episode_reward: -586.662 [-623.997, -549.327] - loss: 12.443 - mae: 28.285 - mean_q: 30.925 Interval 1111 (555000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0676 Interval 1112 (555500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2063 Interval 1113 (556000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1180 Interval 1114 (556500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1795 Interval 1115 (557000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2168 Interval 1116 (557500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1005 Interval 1117 (558000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1860 3 episodes - episode_reward: -313.887 [-502.089, -215.823] - loss: 16.081 - mae: 30.328 - mean_q: 32.923 Interval 1118 (558500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.5005 1 episodes - episode_reward: 162.854 [162.854, 162.854] - loss: 11.942 - mae: 30.547 - mean_q: 33.326 Interval 1119 (559000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6192 1 episodes - episode_reward: -227.123 [-227.123, -227.123] - loss: 13.417 - mae: 30.331 - mean_q: 32.737 Interval 1120 (559500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5715 3 episodes - episode_reward: -95.753 [-257.240, 155.456] - loss: 13.035 - mae: 30.838 - mean_q: 33.785 Interval 1121 (560000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1415 1 episodes - episode_reward: -60.576 [-60.576, -60.576] - loss: 12.947 - mae: 31.226 - mean_q: 34.146 Interval 1122 (560500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.5029 1 episodes - episode_reward: 191.659 [191.659, 191.659] - loss: 10.072 - mae: 31.450 - mean_q: 34.682 Interval 1123 (561000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9911 5 episodes - episode_reward: -161.925 [-203.340, -100.000] - loss: 15.942 - mae: 31.764 - mean_q: 34.209 Interval 1124 (561500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8958 8 episodes - episode_reward: -197.678 [-293.201, -100.000] - loss: 13.043 - mae: 32.183 - mean_q: 34.749 Interval 1125 (562000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0883 2 episodes - episode_reward: 24.608 [-120.029, 169.246] - loss: 14.434 - mae: 32.163 - mean_q: 34.210 Interval 1126 (562500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9411 3 episodes - episode_reward: -180.413 [-210.546, -134.547] - loss: 13.824 - mae: 32.379 - mean_q: 34.422 Interval 1127 (563000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2960 1 episodes - episode_reward: -117.121 [-117.121, -117.121] - loss: 14.725 - mae: 32.867 - mean_q: 34.914 Interval 1128 (563500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4390 1 episodes - episode_reward: 158.756 [158.756, 158.756] - loss: 13.955 - mae: 33.144 - mean_q: 35.459 Interval 1129 (564000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0191 2 episodes - episode_reward: 11.178 [-219.530, 241.887] - loss: 15.834 - mae: 33.608 - mean_q: 36.046 Interval 1130 (564500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1182 Interval 1131 (565000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3766 1 episodes - episode_reward: 154.473 [154.473, 154.473] - loss: 15.521 - mae: 33.969 - mean_q: 36.778 Interval 1132 (565500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2062 2 episodes - episode_reward: 41.595 [-94.326, 177.516] - loss: 14.881 - mae: 34.694 - mean_q: 37.599 Interval 1133 (566000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2810 1 episodes - episode_reward: -171.929 [-171.929, -171.929] - loss: 13.460 - mae: 34.974 - mean_q: 37.965 Interval 1134 (566500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3906 1 episodes - episode_reward: 206.763 [206.763, 206.763] - loss: 14.089 - mae: 35.213 - mean_q: 38.149 Interval 1135 (567000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1679 Interval 1136 (567500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2038 2 episodes - episode_reward: 28.949 [-136.969, 194.868] - loss: 12.723 - mae: 36.283 - mean_q: 39.380 Interval 1137 (568000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0085 Interval 1138 (568500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9296 3 episodes - episode_reward: -160.634 [-547.136, 153.644] - loss: 15.218 - mae: 36.528 - mean_q: 39.550 Interval 1139 (569000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0167 Interval 1140 (569500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2284 1 episodes - episode_reward: 139.023 [139.023, 139.023] - loss: 16.771 - mae: 36.910 - mean_q: 40.325 Interval 1141 (570000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0378 Interval 1142 (570500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1095 2 episodes - episode_reward: -35.080 [-171.033, 100.873] - loss: 13.817 - mae: 36.580 - mean_q: 40.041 Interval 1143 (571000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0568 Interval 1144 (571500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0471 Interval 1145 (572000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3808 1 episodes - episode_reward: 168.198 [168.198, 168.198] - loss: 15.271 - mae: 37.022 - mean_q: 40.438 Interval 1146 (572500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8066 2 episodes - episode_reward: -178.200 [-231.387, -125.013] - loss: 12.342 - mae: 36.983 - mean_q: 40.420 Interval 1147 (573000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3325 1 episodes - episode_reward: 211.099 [211.099, 211.099] - loss: 15.837 - mae: 37.235 - mean_q: 40.763 Interval 1148 (573500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1424 Interval 1149 (574000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3723 4 episodes - episode_reward: -161.265 [-364.795, 168.809] - loss: 14.703 - mae: 36.834 - mean_q: 39.993 Interval 1150 (574500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3151 1 episodes - episode_reward: 164.590 [164.590, 164.590] - loss: 12.514 - mae: 36.898 - mean_q: 40.500 Interval 1151 (575000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0816 Interval 1152 (575500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2249 1 episodes - episode_reward: 95.325 [95.325, 95.325] - loss: 12.161 - mae: 36.126 - mean_q: 39.281 Interval 1153 (576000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3124 3 episodes - episode_reward: -544.622 [-1144.442, -104.937] - loss: 14.058 - mae: 35.890 - mean_q: 38.478 Interval 1154 (576500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2063 2 episodes - episode_reward: 30.550 [-109.584, 170.684] - loss: 15.988 - mae: 35.901 - mean_q: 38.779 Interval 1155 (577000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1094 Interval 1156 (577500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0826 Interval 1157 (578000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6859 3 episodes - episode_reward: -398.968 [-898.935, 215.057] - loss: 11.919 - mae: 35.391 - mean_q: 39.351 Interval 1158 (578500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1981 Interval 1159 (579000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2533 1 episodes - episode_reward: 168.014 [168.014, 168.014] - loss: 13.261 - mae: 35.315 - mean_q: 38.959 Interval 1160 (579500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0889 2 episodes - episode_reward: 2.412 [-145.403, 150.226] - loss: 13.839 - mae: 35.251 - mean_q: 38.817 Interval 1161 (580000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2551 Interval 1162 (580500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2066 2 episodes - episode_reward: -13.009 [-237.044, 211.025] - loss: 14.621 - mae: 35.601 - mean_q: 39.392 Interval 1163 (581000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3288 1 episodes - episode_reward: -124.020 [-124.020, -124.020] - loss: 13.130 - mae: 35.157 - mean_q: 38.506 Interval 1164 (581500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0097 Interval 1165 (582000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4953 4 episodes - episode_reward: -176.752 [-294.045, 133.724] - loss: 12.788 - mae: 35.277 - mean_q: 38.751 Interval 1166 (582500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0657 1 episodes - episode_reward: -185.224 [-185.224, -185.224] - loss: 13.980 - mae: 35.239 - mean_q: 39.206 Interval 1167 (583000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4244 1 episodes - episode_reward: 190.008 [190.008, 190.008] - loss: 11.871 - mae: 35.040 - mean_q: 39.062 Interval 1168 (583500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1343 3 episodes - episode_reward: -148.802 [-337.894, 228.193] - loss: 14.694 - mae: 35.043 - mean_q: 38.311 Interval 1169 (584000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3222 1 episodes - episode_reward: 154.822 [154.822, 154.822] - loss: 11.531 - mae: 35.053 - mean_q: 38.627 Interval 1170 (584500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4073 1 episodes - episode_reward: 182.154 [182.154, 182.154] - loss: 11.810 - mae: 34.881 - mean_q: 38.398 Interval 1171 (585000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1542 Interval 1172 (585500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7004 4 episodes - episode_reward: -198.811 [-433.253, 198.781] - loss: 11.730 - mae: 35.436 - mean_q: 39.830 Interval 1173 (586000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3899 1 episodes - episode_reward: 228.754 [228.754, 228.754] - loss: 12.753 - mae: 35.104 - mean_q: 38.796 Interval 1174 (586500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2573 1 episodes - episode_reward: 138.309 [138.309, 138.309] - loss: 12.329 - mae: 35.351 - mean_q: 39.447 Interval 1175 (587000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0880 3 episodes - episode_reward: -377.369 [-631.409, -185.192] - loss: 12.530 - mae: 35.364 - mean_q: 39.319 Interval 1176 (587500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4057 2 episodes - episode_reward: -49.232 [-329.219, 230.755] - loss: 11.736 - mae: 35.153 - mean_q: 39.004 Interval 1177 (588000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0396 Interval 1178 (588500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3198 5 episodes - episode_reward: -142.558 [-449.095, 131.745] - loss: 11.204 - mae: 35.579 - mean_q: 40.084 Interval 1179 (589000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6564 3 episodes - episode_reward: -109.776 [-382.928, 205.981] - loss: 12.121 - mae: 35.619 - mean_q: 39.816 Interval 1180 (589500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0712 Interval 1181 (590000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3574 1 episodes - episode_reward: 197.319 [197.319, 197.319] - loss: 12.065 - mae: 35.716 - mean_q: 40.107 Interval 1182 (590500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1845 2 episodes - episode_reward: -10.633 [-157.415, 136.148] - loss: 11.667 - mae: 35.354 - mean_q: 39.150 Interval 1183 (591000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1448 Interval 1184 (591500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6232 2 episodes - episode_reward: -177.567 [-529.160, 174.025] - loss: 12.890 - mae: 35.004 - mean_q: 39.051 Interval 1185 (592000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3600 1 episodes - episode_reward: 267.720 [267.720, 267.720] - loss: 11.794 - mae: 35.494 - mean_q: 39.920 Interval 1186 (592500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8197 2 episodes - episode_reward: -214.897 [-628.683, 198.889] - loss: 12.807 - mae: 35.839 - mean_q: 40.012 Interval 1187 (593000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0587 2 episodes - episode_reward: 43.247 [-152.532, 239.027] - loss: 13.813 - mae: 35.621 - mean_q: 40.211 Interval 1188 (593500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3169 2 episodes - episode_reward: -96.355 [-311.035, 118.326] - loss: 12.265 - mae: 35.886 - mean_q: 40.160 Interval 1189 (594000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3707 1 episodes - episode_reward: 223.243 [223.243, 223.243] - loss: 12.706 - mae: 35.943 - mean_q: 40.272 Interval 1190 (594500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1410 Interval 1191 (595000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2520 1 episodes - episode_reward: 183.595 [183.595, 183.595] - loss: 11.943 - mae: 35.307 - mean_q: 39.442 Interval 1192 (595500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.5416 1 episodes - episode_reward: 224.789 [224.789, 224.789] - loss: 9.342 - mae: 35.543 - mean_q: 40.433 Interval 1193 (596000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1312 Interval 1194 (596500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0716 3 episodes - episode_reward: -170.507 [-434.048, 217.418] - loss: 12.647 - mae: 35.402 - mean_q: 40.273 Interval 1195 (597000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1405 2 episodes - episode_reward: -26.069 [-278.511, 226.373] - loss: 10.961 - mae: 35.739 - mean_q: 40.250 Interval 1196 (597500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1106 4 episodes - episode_reward: -102.966 [-227.851, 224.133] - loss: 11.183 - mae: 35.472 - mean_q: 39.674 Interval 1197 (598000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1273 1 episodes - episode_reward: -203.500 [-203.500, -203.500] - loss: 11.787 - mae: 35.165 - mean_q: 38.572 Interval 1198 (598500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0492 2 episodes - episode_reward: 7.143 [-156.183, 170.468] - loss: 10.868 - mae: 34.972 - mean_q: 38.884 Interval 1199 (599000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2867 1 episodes - episode_reward: 130.467 [130.467, 130.467] - loss: 12.496 - mae: 34.851 - mean_q: 38.504 Interval 1200 (599500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1706 1 episodes - episode_reward: 165.121 [165.121, 165.121] - loss: 9.726 - mae: 34.923 - mean_q: 38.895 Interval 1201 (600000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2015 Interval 1202 (600500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2094 2 episodes - episode_reward: 66.987 [-32.532, 166.506] - loss: 9.868 - mae: 35.002 - mean_q: 39.617 Interval 1203 (601000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0160 2 episodes - episode_reward: 26.101 [-190.937, 243.140] - loss: 13.071 - mae: 35.052 - mean_q: 39.760 Interval 1204 (601500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5036 6 episodes - episode_reward: -127.269 [-202.815, -8.391] - loss: 9.396 - mae: 35.235 - mean_q: 39.798 Interval 1205 (602000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2089 Interval 1206 (602500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4479 5 episodes - episode_reward: -119.773 [-286.100, 209.314] - loss: 11.036 - mae: 35.416 - mean_q: 40.434 Interval 1207 (603000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3946 1 episodes - episode_reward: 168.863 [168.863, 168.863] - loss: 12.130 - mae: 35.524 - mean_q: 40.761 Interval 1208 (603500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1958 2 episodes - episode_reward: 51.044 [-100.000, 202.088] - loss: 11.652 - mae: 35.474 - mean_q: 40.719 Interval 1209 (604000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5780 2 episodes - episode_reward: -115.243 [-149.385, -81.101] - loss: 9.890 - mae: 35.512 - mean_q: 40.782 Interval 1210 (604500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7091 2 episodes - episode_reward: -185.888 [-245.970, -125.806] - loss: 11.626 - mae: 35.398 - mean_q: 40.879 Interval 1211 (605000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9035 6 episodes - episode_reward: -232.805 [-290.713, -192.741] - loss: 11.676 - mae: 35.328 - mean_q: 40.341 Interval 1212 (605500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9484 4 episodes - episode_reward: -155.124 [-239.978, -65.374] - loss: 11.564 - mae: 35.296 - mean_q: 39.923 Interval 1213 (606000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1154 1 episodes - episode_reward: 137.546 [137.546, 137.546] - loss: 10.921 - mae: 35.625 - mean_q: 39.977 Interval 1214 (606500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1462 Interval 1215 (607000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3674 3 episodes - episode_reward: -85.676 [-214.684, 151.836] - loss: 9.810 - mae: 35.902 - mean_q: 40.524 Interval 1216 (607500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4516 1 episodes - episode_reward: 267.244 [267.244, 267.244] - loss: 11.025 - mae: 35.815 - mean_q: 39.948 Interval 1217 (608000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7590 3 episodes - episode_reward: -56.832 [-236.700, 205.346] - loss: 9.332 - mae: 35.953 - mean_q: 40.179 Interval 1218 (608500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2517 3 episodes - episode_reward: -248.363 [-327.426, -137.550] - loss: 13.152 - mae: 35.874 - mean_q: 39.313 Interval 1219 (609000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5239 2 episodes - episode_reward: -136.776 [-423.919, 150.368] - loss: 12.250 - mae: 35.621 - mean_q: 38.991 Interval 1220 (609500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6923 1 episodes - episode_reward: -312.758 [-312.758, -312.758] - loss: 10.715 - mae: 35.515 - mean_q: 39.319 Interval 1221 (610000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1835 2 episodes - episode_reward: -48.260 [-219.064, 122.545] - loss: 10.638 - mae: 35.301 - mean_q: 38.428 Interval 1222 (610500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4761 3 episodes - episode_reward: -60.773 [-336.928, 259.457] - loss: 11.129 - mae: 35.146 - mean_q: 37.916 Interval 1223 (611000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3169 3 episodes - episode_reward: -205.878 [-350.177, 22.850] - loss: 12.175 - mae: 34.963 - mean_q: 38.023 Interval 1224 (611500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8298 1 episodes - episode_reward: -397.507 [-397.507, -397.507] - loss: 10.886 - mae: 34.815 - mean_q: 38.200 Interval 1225 (612000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8300 4 episodes - episode_reward: -164.544 [-526.538, 190.319] - loss: 10.983 - mae: 34.797 - mean_q: 38.379 Interval 1226 (612500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4443 1 episodes - episode_reward: -996.331 [-996.331, -996.331] - loss: 13.209 - mae: 34.671 - mean_q: 37.709 Interval 1227 (613000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0323 2 episodes - episode_reward: 8.785 [-130.862, 148.432] - loss: 10.393 - mae: 34.662 - mean_q: 37.310 Interval 1228 (613500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0532 2 episodes - episode_reward: -106.543 [-392.116, 179.029] - loss: 9.037 - mae: 34.762 - mean_q: 37.529 Interval 1229 (614000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4352 5 episodes - episode_reward: -309.243 [-457.863, -135.740] - loss: 12.218 - mae: 34.433 - mean_q: 36.686 Interval 1230 (614500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0060 Interval 1231 (615000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2548 1 episodes - episode_reward: 179.454 [179.454, 179.454] - loss: 9.434 - mae: 34.094 - mean_q: 36.579 Interval 1232 (615500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3780 2 episodes - episode_reward: -101.444 [-113.084, -89.804] - loss: 12.440 - mae: 33.968 - mean_q: 35.547 Interval 1233 (616000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2389 1 episodes - episode_reward: 96.852 [96.852, 96.852] - loss: 11.845 - mae: 33.885 - mean_q: 35.877 Interval 1234 (616500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1427 5 episodes - episode_reward: -260.942 [-384.465, -116.355] - loss: 10.123 - mae: 33.606 - mean_q: 34.670 Interval 1235 (617000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0855 1 episodes - episode_reward: 180.612 [180.612, 180.612] - loss: 9.953 - mae: 33.557 - mean_q: 34.660 Interval 1236 (617500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1860 1 episodes - episode_reward: -336.268 [-336.268, -336.268] - loss: 11.677 - mae: 33.586 - mean_q: 34.493 Interval 1237 (618000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3487 1 episodes - episode_reward: 207.675 [207.675, 207.675] - loss: 11.341 - mae: 33.609 - mean_q: 34.538 Interval 1238 (618500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2325 2 episodes - episode_reward: -65.884 [-311.220, 179.452] - loss: 12.028 - mae: 33.800 - mean_q: 34.518 Interval 1239 (619000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0097 Interval 1240 (619500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2219 1 episodes - episode_reward: 156.014 [156.014, 156.014] - loss: 11.976 - mae: 33.975 - mean_q: 35.008 Interval 1241 (620000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2914 2 episodes - episode_reward: -68.127 [-227.888, 91.635] - loss: 9.607 - mae: 34.037 - mean_q: 34.894 Interval 1242 (620500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1139 Interval 1243 (621000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4642 6 episodes - episode_reward: -112.557 [-223.922, 159.253] - loss: 11.969 - mae: 34.316 - mean_q: 35.154 Interval 1244 (621500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4098 1 episodes - episode_reward: 172.189 [172.189, 172.189] - loss: 12.348 - mae: 33.987 - mean_q: 34.198 Interval 1245 (622000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1849 2 episodes - episode_reward: 42.911 [-100.000, 185.822] - loss: 10.435 - mae: 34.208 - mean_q: 34.745 Interval 1246 (622500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4068 4 episodes - episode_reward: -233.636 [-290.732, -196.278] - loss: 11.491 - mae: 34.290 - mean_q: 34.707 Interval 1247 (623000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6983 7 episodes - episode_reward: -166.237 [-227.364, -2.555] - loss: 11.486 - mae: 33.978 - mean_q: 33.696 Interval 1248 (623500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3294 4 episodes - episode_reward: -188.685 [-282.548, -146.315] - loss: 10.469 - mae: 34.161 - mean_q: 33.800 Interval 1249 (624000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3266 2 episodes - episode_reward: -94.079 [-176.564, -11.593] - loss: 12.575 - mae: 34.468 - mean_q: 33.789 Interval 1250 (624500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2363 2 episodes - episode_reward: -77.721 [-93.202, -62.240] - loss: 11.354 - mae: 34.438 - mean_q: 34.008 Interval 1251 (625000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8413 8 episodes - episode_reward: -167.777 [-295.837, -49.362] - loss: 10.482 - mae: 34.505 - mean_q: 33.728 Interval 1252 (625500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1646 Interval 1253 (626000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2396 5 episodes - episode_reward: -222.129 [-414.485, -27.501] - loss: 12.860 - mae: 34.697 - mean_q: 33.772 Interval 1254 (626500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0577 Interval 1255 (627000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0481 1 episodes - episode_reward: 177.349 [177.349, 177.349] - loss: 10.654 - mae: 35.004 - mean_q: 34.163 Interval 1256 (627500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8869 3 episodes - episode_reward: -320.963 [-390.465, -275.099] - loss: 12.278 - mae: 35.132 - mean_q: 33.943 Interval 1257 (628000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4899 1 episodes - episode_reward: -418.047 [-418.047, -418.047] - loss: 13.947 - mae: 35.350 - mean_q: 33.511 Interval 1258 (628500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4638 1 episodes - episode_reward: 169.042 [169.042, 169.042] - loss: 13.509 - mae: 35.833 - mean_q: 34.236 Interval 1259 (629000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1182 Interval 1260 (629500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1592 Interval 1261 (630000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1615 Interval 1262 (630500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1803 Interval 1263 (631000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0838 3 episodes - episode_reward: -265.481 [-644.217, -52.227] - loss: 11.909 - mae: 37.661 - mean_q: 36.826 Interval 1264 (631500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7567 2 episodes - episode_reward: -186.708 [-239.094, -134.322] - loss: 14.038 - mae: 38.017 - mean_q: 37.449 Interval 1265 (632000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2467 1 episodes - episode_reward: 186.197 [186.197, 186.197] - loss: 13.099 - mae: 38.331 - mean_q: 37.827 Interval 1266 (632500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0452 Interval 1267 (633000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4218 2 episodes - episode_reward: -112.766 [-377.452, 151.921] - loss: 12.245 - mae: 39.211 - mean_q: 39.188 Interval 1268 (633500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0793 Interval 1269 (634000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6645 2 episodes - episode_reward: -643.544 [-1472.442, 185.354] - loss: 13.720 - mae: 39.369 - mean_q: 38.686 Interval 1270 (634500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1954 3 episodes - episode_reward: -194.674 [-386.364, 169.896] - loss: 15.168 - mae: 39.382 - mean_q: 38.848 Interval 1271 (635000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1118 Interval 1272 (635500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2325 1 episodes - episode_reward: 183.270 [183.270, 183.270] - loss: 13.407 - mae: 39.687 - mean_q: 39.088 Interval 1273 (636000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3354 1 episodes - episode_reward: 140.865 [140.865, 140.865] - loss: 13.642 - mae: 40.061 - mean_q: 39.609 Interval 1274 (636500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2340 2 episodes - episode_reward: -69.815 [-296.789, 157.158] - loss: 11.746 - mae: 40.265 - mean_q: 40.085 Interval 1275 (637000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0072 Interval 1276 (637500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0468 Interval 1277 (638000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7271 2 episodes - episode_reward: -94.009 [-329.302, 141.284] - loss: 13.064 - mae: 40.224 - mean_q: 40.692 Interval 1278 (638500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5898 2 episodes - episode_reward: -745.076 [-950.182, -539.971] - loss: 12.844 - mae: 40.051 - mean_q: 39.456 Interval 1279 (639000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6963 5 episodes - episode_reward: -364.019 [-1046.558, 195.796] - loss: 11.894 - mae: 39.970 - mean_q: 38.794 Interval 1280 (639500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2187 Interval 1281 (640000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8882 5 episodes - episode_reward: -264.994 [-434.441, 247.017] - loss: 13.511 - mae: 40.023 - mean_q: 39.254 Interval 1282 (640500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4228 2 episodes - episode_reward: -344.301 [-876.989, 188.387] - loss: 12.374 - mae: 40.079 - mean_q: 38.987 Interval 1283 (641000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9223 3 episodes - episode_reward: -670.004 [-891.363, -444.862] - loss: 11.969 - mae: 40.084 - mean_q: 38.518 Interval 1284 (641500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0825 Interval 1285 (642000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2205 2 episodes - episode_reward: -87.444 [-288.770, 113.883] - loss: 13.269 - mae: 39.967 - mean_q: 37.789 Interval 1286 (642500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5791 2 episodes - episode_reward: -120.587 [-429.523, 188.349] - loss: 12.930 - mae: 39.999 - mean_q: 37.497 Interval 1287 (643000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0670 Interval 1288 (643500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1032 3 episodes - episode_reward: -172.774 [-356.433, 187.881] - loss: 14.354 - mae: 39.882 - mean_q: 37.733 Interval 1289 (644000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0797 Interval 1290 (644500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3239 1 episodes - episode_reward: 178.058 [178.058, 178.058] - loss: 12.787 - mae: 40.202 - mean_q: 37.715 Interval 1291 (645000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9196 1 episodes - episode_reward: -490.120 [-490.120, -490.120] - loss: 13.881 - mae: 40.113 - mean_q: 36.877 Interval 1292 (645500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1562 Interval 1293 (646000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2173 1 episodes - episode_reward: 92.507 [92.507, 92.507] - loss: 11.729 - mae: 39.794 - mean_q: 36.094 Interval 1294 (646500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0891 Interval 1295 (647000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2448 1 episodes - episode_reward: 174.519 [174.519, 174.519] - loss: 12.933 - mae: 40.257 - mean_q: 37.333 Interval 1296 (647500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2468 Interval 1297 (648000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2834 1 episodes - episode_reward: 237.740 [237.740, 237.740] - loss: 14.661 - mae: 39.920 - mean_q: 37.040 Interval 1298 (648500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1435 Interval 1299 (649000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0362 Interval 1300 (649500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4893 3 episodes - episode_reward: -112.361 [-286.727, 49.644] - loss: 12.093 - mae: 39.603 - mean_q: 36.295 Interval 1301 (650000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0459 4 episodes - episode_reward: -434.331 [-519.353, -269.652] - loss: 13.540 - mae: 39.679 - mean_q: 36.552 Interval 1302 (650500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6539 2 episodes - episode_reward: -1052.140 [-1437.522, -666.757] - loss: 14.479 - mae: 39.804 - mean_q: 36.358 Interval 1303 (651000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0199 Interval 1304 (651500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0368 2 episodes - episode_reward: -273.779 [-654.012, 106.454] - loss: 11.646 - mae: 39.839 - mean_q: 35.729 Interval 1305 (652000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1014 Interval 1306 (652500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3800 2 episodes - episode_reward: -44.741 [-296.893, 207.411] - loss: 13.842 - mae: 39.845 - mean_q: 36.572 Interval 1307 (653000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1201 Interval 1308 (653500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4535 2 episodes - episode_reward: -190.854 [-470.842, 89.134] - loss: 12.968 - mae: 39.358 - mean_q: 35.751 Interval 1309 (654000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2149 1 episodes - episode_reward: 206.700 [206.700, 206.700] - loss: 14.070 - mae: 39.073 - mean_q: 35.976 Interval 1310 (654500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1284 Interval 1311 (655000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4817 2 episodes - episode_reward: -107.614 [-190.515, -24.712] - loss: 12.084 - mae: 39.516 - mean_q: 37.351 Interval 1312 (655500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4183 1 episodes - episode_reward: 155.302 [155.302, 155.302] - loss: 15.294 - mae: 39.529 - mean_q: 37.142 Interval 1313 (656000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4118 1 episodes - episode_reward: 227.521 [227.521, 227.521] - loss: 11.010 - mae: 39.468 - mean_q: 37.213 Interval 1314 (656500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4385 1 episodes - episode_reward: 221.884 [221.884, 221.884] - loss: 11.601 - mae: 39.650 - mean_q: 37.362 Interval 1315 (657000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1355 2 episodes - episode_reward: 25.838 [-139.663, 191.339] - loss: 14.402 - mae: 39.666 - mean_q: 37.363 Interval 1316 (657500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2493 Interval 1317 (658000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0389 2 episodes - episode_reward: -35.267 [-246.356, 175.822] - loss: 13.022 - mae: 39.644 - mean_q: 37.952 Interval 1318 (658500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0145 Interval 1319 (659000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0108 2 episodes - episode_reward: 39.629 [-152.969, 232.226] - loss: 12.706 - mae: 39.594 - mean_q: 38.420 Interval 1320 (659500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3193 1 episodes - episode_reward: 183.629 [183.629, 183.629] - loss: 11.224 - mae: 39.723 - mean_q: 38.924 Interval 1321 (660000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1445 Interval 1322 (660500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2675 Interval 1323 (661000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1483 Interval 1324 (661500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4416 5 episodes - episode_reward: -169.772 [-246.720, 11.301] - loss: 10.969 - mae: 39.290 - mean_q: 39.353 Interval 1325 (662000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0756 Interval 1326 (662500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0016 2 episodes - episode_reward: -7.321 [-189.240, 174.598] - loss: 12.065 - mae: 38.912 - mean_q: 38.877 Interval 1327 (663000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0881 Interval 1328 (663500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0185 Interval 1329 (664000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2986 1 episodes - episode_reward: 132.672 [132.672, 132.672] - loss: 10.683 - mae: 38.548 - mean_q: 38.240 Interval 1330 (664500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3800 2 episodes - episode_reward: -73.299 [-307.305, 160.707] - loss: 10.898 - mae: 38.466 - mean_q: 38.604 Interval 1331 (665000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3982 1 episodes - episode_reward: -269.291 [-269.291, -269.291] - loss: 9.795 - mae: 38.847 - mean_q: 38.274 Interval 1332 (665500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6467 2 episodes - episode_reward: -138.314 [-482.739, 206.112] - loss: 12.578 - mae: 38.708 - mean_q: 38.737 Interval 1333 (666000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0386 1 episodes - episode_reward: 178.938 [178.938, 178.938] - loss: 11.609 - mae: 38.682 - mean_q: 38.857 Interval 1334 (666500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4543 2 episodes - episode_reward: -465.384 [-762.185, -168.583] - loss: 9.690 - mae: 38.436 - mean_q: 38.378 Interval 1335 (667000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9950 3 episodes - episode_reward: -280.693 [-841.803, 207.833] - loss: 8.298 - mae: 38.897 - mean_q: 39.605 Interval 1336 (667500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0717 1 episodes - episode_reward: -1174.501 [-1174.501, -1174.501] - loss: 9.919 - mae: 38.960 - mean_q: 38.033 Interval 1337 (668000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2102 3 episodes - episode_reward: -357.935 [-832.877, 194.349] - loss: 9.793 - mae: 39.352 - mean_q: 38.588 Interval 1338 (668500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1823 Interval 1339 (669000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4192 1 episodes - episode_reward: 216.428 [216.428, 216.428] - loss: 8.826 - mae: 38.906 - mean_q: 38.073 Interval 1340 (669500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1318 Interval 1341 (670000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1118 1 episodes - episode_reward: -48.436 [-48.436, -48.436] - loss: 9.701 - mae: 38.677 - mean_q: 37.352 Interval 1342 (670500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3513 1 episodes - episode_reward: 161.357 [161.357, 161.357] - loss: 8.768 - mae: 38.424 - mean_q: 38.088 Interval 1343 (671000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2244 1 episodes - episode_reward: 155.058 [155.058, 155.058] - loss: 9.080 - mae: 38.441 - mean_q: 38.000 Interval 1344 (671500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.4348 7 episodes - episode_reward: -386.541 [-1004.495, -143.649] - loss: 9.592 - mae: 38.427 - mean_q: 37.453 Interval 1345 (672000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1364 Interval 1346 (672500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2942 3 episodes - episode_reward: -181.979 [-411.657, 194.777] - loss: 7.851 - mae: 38.412 - mean_q: 37.368 Interval 1347 (673000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1694 1 episodes - episode_reward: -1171.090 [-1171.090, -1171.090] - loss: 10.417 - mae: 38.088 - mean_q: 36.842 Interval 1348 (673500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4169 2 episodes - episode_reward: -56.134 [-287.419, 175.150] - loss: 8.151 - mae: 37.829 - mean_q: 36.832 Interval 1349 (674000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1690 Interval 1350 (674500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3908 1 episodes - episode_reward: 170.400 [170.400, 170.400] - loss: 8.155 - mae: 37.588 - mean_q: 36.369 Interval 1351 (675000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4124 1 episodes - episode_reward: -186.057 [-186.057, -186.057] - loss: 7.982 - mae: 37.302 - mean_q: 36.827 Interval 1352 (675500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0297 Interval 1353 (676000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4099 3 episodes - episode_reward: -62.733 [-217.370, 145.643] - loss: 6.375 - mae: 37.011 - mean_q: 37.199 Interval 1354 (676500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3112 1 episodes - episode_reward: -129.941 [-129.941, -129.941] - loss: 10.410 - mae: 37.174 - mean_q: 36.699 Interval 1355 (677000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3825 1 episodes - episode_reward: 176.112 [176.112, 176.112] - loss: 7.072 - mae: 36.702 - mean_q: 37.020 Interval 1356 (677500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6578 3 episodes - episode_reward: -109.995 [-169.683, -68.047] - loss: 8.151 - mae: 36.896 - mean_q: 37.884 Interval 1357 (678000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2385 1 episodes - episode_reward: -135.216 [-135.216, -135.216] - loss: 8.348 - mae: 36.963 - mean_q: 37.629 Interval 1358 (678500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0211 Interval 1359 (679000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3250 3 episodes - episode_reward: -53.715 [-177.207, 128.168] - loss: 9.503 - mae: 36.843 - mean_q: 37.540 Interval 1360 (679500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0133 2 episodes - episode_reward: -4.389 [-150.418, 141.640] - loss: 8.733 - mae: 37.067 - mean_q: 37.354 Interval 1361 (680000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3731 1 episodes - episode_reward: 187.301 [187.301, 187.301] - loss: 7.505 - mae: 37.141 - mean_q: 37.755 Interval 1362 (680500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2385 1 episodes - episode_reward: 189.024 [189.024, 189.024] - loss: 9.251 - mae: 37.401 - mean_q: 37.952 Interval 1363 (681000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8998 4 episodes - episode_reward: -217.133 [-277.697, -166.290] - loss: 8.848 - mae: 37.338 - mean_q: 37.965 Interval 1364 (681500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4993 3 episodes - episode_reward: -134.743 [-160.047, -100.000] - loss: 7.984 - mae: 37.174 - mean_q: 36.546 Interval 1365 (682000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0191 Interval 1366 (682500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0742 2 episodes - episode_reward: 21.791 [-89.179, 132.761] - loss: 8.994 - mae: 37.313 - mean_q: 37.363 Interval 1367 (683000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3676 1 episodes - episode_reward: 180.815 [180.815, 180.815] - loss: 10.330 - mae: 37.252 - mean_q: 37.103 Interval 1368 (683500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0984 Interval 1369 (684000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1530 1 episodes - episode_reward: 187.085 [187.085, 187.085] - loss: 9.502 - mae: 37.214 - mean_q: 37.297 Interval 1370 (684500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1838 Interval 1371 (685000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2984 1 episodes - episode_reward: 161.832 [161.832, 161.832] - loss: 8.555 - mae: 37.307 - mean_q: 38.133 Interval 1372 (685500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1484 2 episodes - episode_reward: 48.461 [-100.000, 196.923] - loss: 7.294 - mae: 37.578 - mean_q: 38.193 Interval 1373 (686000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1503 3 episodes - episode_reward: -40.249 [-179.153, 158.407] - loss: 7.864 - mae: 37.474 - mean_q: 37.970 Interval 1374 (686500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0660 Interval 1375 (687000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3649 1 episodes - episode_reward: 205.512 [205.512, 205.512] - loss: 6.814 - mae: 37.778 - mean_q: 38.677 Interval 1376 (687500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5735 4 episodes - episode_reward: -170.860 [-233.711, -100.000] - loss: 7.530 - mae: 37.734 - mean_q: 38.676 Interval 1377 (688000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3424 1 episodes - episode_reward: -287.374 [-287.374, -287.374] - loss: 9.862 - mae: 37.307 - mean_q: 37.951 Interval 1378 (688500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2767 1 episodes - episode_reward: 195.359 [195.359, 195.359] - loss: 8.176 - mae: 37.165 - mean_q: 37.946 Interval 1379 (689000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3641 1 episodes - episode_reward: 166.327 [166.327, 166.327] - loss: 9.249 - mae: 37.124 - mean_q: 38.245 Interval 1380 (689500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1126 3 episodes - episode_reward: -4.855 [-180.134, 176.833] - loss: 6.929 - mae: 36.811 - mean_q: 37.832 Interval 1381 (690000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0964 1 episodes - episode_reward: -109.537 [-109.537, -109.537] - loss: 8.750 - mae: 37.125 - mean_q: 38.009 Interval 1382 (690500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2033 2 episodes - episode_reward: 49.694 [-75.870, 175.259] - loss: 8.840 - mae: 37.233 - mean_q: 38.349 Interval 1383 (691000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3488 1 episodes - episode_reward: 190.744 [190.744, 190.744] - loss: 8.997 - mae: 37.291 - mean_q: 39.151 Interval 1384 (691500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0652 2 episodes - episode_reward: 12.579 [-161.981, 187.140] - loss: 9.354 - mae: 37.389 - mean_q: 38.711 Interval 1385 (692000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1624 Interval 1386 (692500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7100 4 episodes - episode_reward: -89.617 [-218.239, 188.509] - loss: 8.246 - mae: 37.350 - mean_q: 38.736 Interval 1387 (693000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1777 1 episodes - episode_reward: 185.496 [185.496, 185.496] - loss: 8.030 - mae: 37.257 - mean_q: 38.539 Interval 1388 (693500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3240 1 episodes - episode_reward: -213.564 [-213.564, -213.564] - loss: 5.897 - mae: 36.993 - mean_q: 38.259 Interval 1389 (694000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1068 1 episodes - episode_reward: 167.601 [167.601, 167.601] - loss: 7.671 - mae: 36.933 - mean_q: 38.394 Interval 1390 (694500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5055 2 episodes - episode_reward: -160.811 [-162.590, -159.032] - loss: 7.980 - mae: 36.951 - mean_q: 38.424 Interval 1391 (695000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0364 Interval 1392 (695500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1566 2 episodes - episode_reward: -12.739 [-100.000, 74.522] - loss: 7.585 - mae: 36.900 - mean_q: 37.719 Interval 1393 (696000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0316 Interval 1394 (696500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3711 1 episodes - episode_reward: 190.249 [190.249, 190.249] - loss: 9.248 - mae: 36.659 - mean_q: 37.657 Interval 1395 (697000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9944 5 episodes - episode_reward: -74.582 [-232.404, 188.938] - loss: 7.428 - mae: 36.809 - mean_q: 37.885 Interval 1396 (697500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1274 1 episodes - episode_reward: -141.812 [-141.812, -141.812] - loss: 9.525 - mae: 36.922 - mean_q: 37.688 Interval 1397 (698000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2978 1 episodes - episode_reward: -177.730 [-177.730, -177.730] - loss: 8.945 - mae: 36.981 - mean_q: 37.325 Interval 1398 (698500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2562 7 episodes - episode_reward: -147.707 [-263.953, -48.356] - loss: 9.785 - mae: 36.967 - mean_q: 37.073 Interval 1399 (699000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4777 2 episodes - episode_reward: -181.398 [-209.578, -153.218] - loss: 8.259 - mae: 37.122 - mean_q: 36.483 Interval 1400 (699500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4226 1 episodes - episode_reward: 209.039 [209.039, 209.039] - loss: 9.080 - mae: 36.928 - mean_q: 36.836 Interval 1401 (700000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2512 2 episodes - episode_reward: 46.947 [-100.000, 193.894] - loss: 10.673 - mae: 36.850 - mean_q: 36.598 Interval 1402 (700500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.5279 1 episodes - episode_reward: 261.160 [261.160, 261.160] - loss: 7.548 - mae: 36.912 - mean_q: 37.307 Interval 1403 (701000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3147 5 episodes - episode_reward: -103.260 [-230.113, 252.351] - loss: 7.571 - mae: 36.935 - mean_q: 37.759 Interval 1404 (701500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1878 Interval 1405 (702000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3975 1 episodes - episode_reward: 183.207 [183.207, 183.207] - loss: 8.529 - mae: 36.694 - mean_q: 36.716 Interval 1406 (702500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3259 1 episodes - episode_reward: 243.037 [243.037, 243.037] - loss: 8.628 - mae: 36.975 - mean_q: 37.047 Interval 1407 (703000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1161 Interval 1408 (703500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2072 2 episodes - episode_reward: -13.767 [-150.572, 123.038] - loss: 8.788 - mae: 36.860 - mean_q: 36.781 Interval 1409 (704000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1684 Interval 1410 (704500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9152 3 episodes - episode_reward: -137.524 [-297.453, 106.784] - loss: 6.645 - mae: 37.030 - mean_q: 37.271 Interval 1411 (705000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1948 Interval 1412 (705500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2343 1 episodes - episode_reward: 216.429 [216.429, 216.429] - loss: 7.824 - mae: 37.409 - mean_q: 36.844 Interval 1413 (706000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3116 1 episodes - episode_reward: 147.147 [147.147, 147.147] - loss: 8.302 - mae: 37.611 - mean_q: 37.926 Interval 1414 (706500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6615 2 episodes - episode_reward: -25.726 [-246.196, 194.745] - loss: 7.541 - mae: 37.651 - mean_q: 37.675 Interval 1415 (707000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0811 1 episodes - episode_reward: -410.290 [-410.290, -410.290] - loss: 8.583 - mae: 37.702 - mean_q: 37.659 Interval 1416 (707500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3366 1 episodes - episode_reward: 248.222 [248.222, 248.222] - loss: 7.050 - mae: 37.762 - mean_q: 37.991 Interval 1417 (708000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3691 2 episodes - episode_reward: -74.185 [-273.412, 125.041] - loss: 6.963 - mae: 37.886 - mean_q: 37.604 Interval 1418 (708500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2419 1 episodes - episode_reward: 128.605 [128.605, 128.605] - loss: 7.018 - mae: 37.788 - mean_q: 38.298 Interval 1419 (709000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1958 Interval 1420 (709500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3527 3 episodes - episode_reward: -211.598 [-585.190, 190.029] - loss: 6.793 - mae: 37.821 - mean_q: 38.175 Interval 1421 (710000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0624 Interval 1422 (710500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3815 1 episodes - episode_reward: 180.419 [180.419, 180.419] - loss: 8.520 - mae: 37.781 - mean_q: 37.425 Interval 1423 (711000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4274 1 episodes - episode_reward: 229.166 [229.166, 229.166] - loss: 9.271 - mae: 38.012 - mean_q: 37.981 Interval 1424 (711500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4437 1 episodes - episode_reward: 208.500 [208.500, 208.500] - loss: 6.728 - mae: 37.819 - mean_q: 38.501 Interval 1425 (712000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2215 1 episodes - episode_reward: 196.599 [196.599, 196.599] - loss: 7.558 - mae: 37.808 - mean_q: 38.318 Interval 1426 (712500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1861 Interval 1427 (713000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3140 1 episodes - episode_reward: 194.745 [194.745, 194.745] - loss: 6.930 - mae: 38.043 - mean_q: 38.713 Interval 1428 (713500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3309 1 episodes - episode_reward: 158.542 [158.542, 158.542] - loss: 6.557 - mae: 37.834 - mean_q: 38.490 Interval 1429 (714000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0581 Interval 1430 (714500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3618 1 episodes - episode_reward: 193.916 [193.916, 193.916] - loss: 6.330 - mae: 38.134 - mean_q: 38.954 Interval 1431 (715000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4027 4 episodes - episode_reward: -180.143 [-472.551, 226.426] - loss: 7.475 - mae: 38.095 - mean_q: 38.407 Interval 1432 (715500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0732 Interval 1433 (716000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2512 1 episodes - episode_reward: 236.145 [236.145, 236.145] - loss: 7.781 - mae: 37.793 - mean_q: 38.126 Interval 1434 (716500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3141 1 episodes - episode_reward: 124.525 [124.525, 124.525] - loss: 7.904 - mae: 37.606 - mean_q: 38.168 Interval 1435 (717000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3044 1 episodes - episode_reward: 225.390 [225.390, 225.390] - loss: 7.262 - mae: 37.837 - mean_q: 39.289 Interval 1436 (717500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4915 1 episodes - episode_reward: 175.302 [175.302, 175.302] - loss: 8.921 - mae: 37.805 - mean_q: 39.840 Interval 1437 (718000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3215 1 episodes - episode_reward: 181.989 [181.989, 181.989] - loss: 5.925 - mae: 37.677 - mean_q: 39.683 Interval 1438 (718500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3745 1 episodes - episode_reward: 209.770 [209.770, 209.770] - loss: 7.017 - mae: 37.683 - mean_q: 39.929 Interval 1439 (719000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3752 1 episodes - episode_reward: 187.896 [187.896, 187.896] - loss: 6.758 - mae: 37.608 - mean_q: 39.828 Interval 1440 (719500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0952 Interval 1441 (720000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9233 3 episodes - episode_reward: -311.820 [-628.429, 188.299] - loss: 8.028 - mae: 37.887 - mean_q: 39.972 Interval 1442 (720500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0887 Interval 1443 (721000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9873 3 episodes - episode_reward: -163.652 [-406.527, 178.042] - loss: 7.206 - mae: 37.802 - mean_q: 39.909 Interval 1444 (721500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3485 1 episodes - episode_reward: 216.305 [216.305, 216.305] - loss: 6.225 - mae: 37.802 - mean_q: 40.610 Interval 1445 (722000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1960 1 episodes - episode_reward: -136.548 [-136.548, -136.548] - loss: 5.816 - mae: 37.699 - mean_q: 40.608 Interval 1446 (722500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2262 1 episodes - episode_reward: 184.275 [184.275, 184.275] - loss: 7.191 - mae: 37.746 - mean_q: 41.014 Interval 1447 (723000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0033 1 episodes - episode_reward: 191.982 [191.982, 191.982] - loss: 8.133 - mae: 37.735 - mean_q: 41.555 Interval 1448 (723500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2580 2 episodes - episode_reward: -40.756 [-296.732, 215.219] - loss: 7.107 - mae: 37.582 - mean_q: 40.893 Interval 1449 (724000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1322 Interval 1450 (724500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2399 1 episodes - episode_reward: 185.524 [185.524, 185.524] - loss: 6.418 - mae: 37.470 - mean_q: 41.198 Interval 1451 (725000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8506 2 episodes - episode_reward: -195.831 [-547.839, 156.178] - loss: 7.526 - mae: 37.650 - mean_q: 40.989 Interval 1452 (725500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0231 1 episodes - episode_reward: -603.724 [-603.724, -603.724] - loss: 7.212 - mae: 37.780 - mean_q: 40.250 Interval 1453 (726000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4476 1 episodes - episode_reward: 214.225 [214.225, 214.225] - loss: 7.929 - mae: 38.096 - mean_q: 41.321 Interval 1454 (726500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0857 Interval 1455 (727000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4258 1 episodes - episode_reward: 228.967 [228.967, 228.967] - loss: 7.136 - mae: 38.276 - mean_q: 41.826 Interval 1456 (727500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3801 1 episodes - episode_reward: 252.245 [252.245, 252.245] - loss: 5.517 - mae: 38.326 - mean_q: 41.811 Interval 1457 (728000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6100 2 episodes - episode_reward: -122.767 [-428.830, 183.296] - loss: 7.637 - mae: 38.395 - mean_q: 41.736 Interval 1458 (728500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0253 Interval 1459 (729000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2602 1 episodes - episode_reward: 123.641 [123.641, 123.641] - loss: 7.795 - mae: 38.291 - mean_q: 41.994 Interval 1460 (729500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0518 Interval 1461 (730000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2101 Interval 1462 (730500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3334 1 episodes - episode_reward: 31.391 [31.391, 31.391] - loss: 6.819 - mae: 37.626 - mean_q: 40.922 Interval 1463 (731000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2024 2 episodes - episode_reward: -421.515 [-578.322, -264.708] - loss: 6.633 - mae: 37.338 - mean_q: 40.841 Interval 1464 (731500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3474 2 episodes - episode_reward: -455.721 [-569.383, -342.058] - loss: 8.660 - mae: 37.483 - mean_q: 41.633 Interval 1465 (732000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4988 2 episodes - episode_reward: -81.175 [-257.922, 95.571] - loss: 6.066 - mae: 37.557 - mean_q: 41.422 Interval 1466 (732500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2036 Interval 1467 (733000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2633 1 episodes - episode_reward: 137.467 [137.467, 137.467] - loss: 6.095 - mae: 37.615 - mean_q: 40.918 Interval 1468 (733500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0692 2 episodes - episode_reward: 15.830 [-144.544, 176.203] - loss: 7.225 - mae: 37.312 - mean_q: 40.452 Interval 1469 (734000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1107 2 episodes - episode_reward: -295.971 [-353.103, -238.839] - loss: 6.802 - mae: 37.406 - mean_q: 39.529 Interval 1470 (734500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3019 1 episodes - episode_reward: 178.375 [178.375, 178.375] - loss: 7.320 - mae: 37.018 - mean_q: 39.418 Interval 1471 (735000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1236 Interval 1472 (735500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4991 3 episodes - episode_reward: -62.392 [-233.347, 175.076] - loss: 6.815 - mae: 36.773 - mean_q: 38.701 Interval 1473 (736000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2316 Interval 1474 (736500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4300 1 episodes - episode_reward: 229.780 [229.780, 229.780] - loss: 6.884 - mae: 36.236 - mean_q: 38.150 Interval 1475 (737000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3171 1 episodes - episode_reward: 229.837 [229.837, 229.837] - loss: 9.048 - mae: 36.452 - mean_q: 38.035 Interval 1476 (737500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1892 2 episodes - episode_reward: 62.041 [-96.877, 220.959] - loss: 8.935 - mae: 36.305 - mean_q: 38.604 Interval 1477 (738000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3797 1 episodes - episode_reward: -2888.975 [-2888.975, -2888.975] - loss: 5.834 - mae: 36.233 - mean_q: 38.835 Interval 1478 (738500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.6744 1 episodes - episode_reward: 236.367 [236.367, 236.367] - loss: 5.549 - mae: 36.122 - mean_q: 38.335 Interval 1479 (739000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4254 1 episodes - episode_reward: 273.217 [273.217, 273.217] - loss: 7.405 - mae: 36.462 - mean_q: 38.544 Interval 1480 (739500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3251 1 episodes - episode_reward: 213.502 [213.502, 213.502] - loss: 6.749 - mae: 36.425 - mean_q: 38.524 Interval 1481 (740000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2399 3 episodes - episode_reward: -51.811 [-103.066, 44.403] - loss: 7.404 - mae: 36.684 - mean_q: 38.870 Interval 1482 (740500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3732 1 episodes - episode_reward: 235.062 [235.062, 235.062] - loss: 6.771 - mae: 36.740 - mean_q: 38.637 Interval 1483 (741000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3149 2 episodes - episode_reward: 78.319 [-53.214, 209.853] - loss: 6.795 - mae: 37.201 - mean_q: 39.130 Interval 1484 (741500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1638 1 episodes - episode_reward: -166.298 [-166.298, -166.298] - loss: 7.743 - mae: 37.304 - mean_q: 38.956 Interval 1485 (742000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3027 1 episodes - episode_reward: 186.842 [186.842, 186.842] - loss: 8.883 - mae: 37.360 - mean_q: 39.394 Interval 1486 (742500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5545 2 episodes - episode_reward: -142.768 [-229.196, -56.340] - loss: 9.046 - mae: 37.740 - mean_q: 38.874 Interval 1487 (743000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7463 3 episodes - episode_reward: -62.672 [-221.542, 193.067] - loss: 8.105 - mae: 38.149 - mean_q: 38.763 Interval 1488 (743500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1317 3 episodes - episode_reward: -404.444 [-596.221, -131.021] - loss: 9.060 - mae: 38.164 - mean_q: 38.635 Interval 1489 (744000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3867 1 episodes - episode_reward: 171.966 [171.966, 171.966] - loss: 8.046 - mae: 38.476 - mean_q: 38.105 Interval 1490 (744500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0172 Interval 1491 (745000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0304 Interval 1492 (745500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0358 Interval 1493 (746000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0331 Interval 1494 (746500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0324 Interval 1495 (747000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0389 Interval 1496 (747500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3295 1 episodes - episode_reward: 46.690 [46.690, 46.690] - loss: 10.025 - mae: 38.443 - mean_q: 37.995 Interval 1497 (748000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6843 5 episodes - episode_reward: -52.104 [-195.104, 165.457] - loss: 7.470 - mae: 38.584 - mean_q: 38.854 Interval 1498 (748500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2072 Interval 1499 (749000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0843 2 episodes - episode_reward: 13.303 [-169.433, 196.038] - loss: 6.557 - mae: 38.620 - mean_q: 38.339 Interval 1500 (749500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2109 4 episodes - episode_reward: -132.074 [-382.902, 204.275] - loss: 10.702 - mae: 39.111 - mean_q: 37.961 Interval 1501 (750000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1102 2 episodes - episode_reward: -273.257 [-486.869, -59.646] - loss: 8.986 - mae: 39.604 - mean_q: 36.944 Interval 1502 (750500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9104 3 episodes - episode_reward: -271.573 [-486.331, -160.892] - loss: 14.065 - mae: 39.958 - mean_q: 35.396 Interval 1503 (751000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.2666 8 episodes - episode_reward: -405.876 [-1655.063, -100.000] - loss: 9.715 - mae: 40.101 - mean_q: 35.400 Interval 1504 (751500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5412 1 episodes - episode_reward: -334.564 [-334.564, -334.564] - loss: 16.408 - mae: 40.686 - mean_q: 33.151 Interval 1505 (752000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0430 Interval 1506 (752500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3932 2 episodes - episode_reward: 193.785 [177.080, 210.491] - loss: 9.303 - mae: 40.918 - mean_q: 33.267 Interval 1507 (753000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1043 1 episodes - episode_reward: -277.405 [-277.405, -277.405] - loss: 20.646 - mae: 41.183 - mean_q: 32.590 Interval 1508 (753500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3839 1 episodes - episode_reward: 214.729 [214.729, 214.729] - loss: 14.491 - mae: 41.426 - mean_q: 33.222 Interval 1509 (754000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4734 4 episodes - episode_reward: -35.468 [-229.941, 202.310] - loss: 18.109 - mae: 41.687 - mean_q: 32.344 Interval 1510 (754500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0331 1 episodes - episode_reward: -3.343 [-3.343, -3.343] - loss: 10.176 - mae: 41.980 - mean_q: 32.024 Interval 1511 (755000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1160 2 episodes - episode_reward: 35.642 [-154.672, 225.956] - loss: 20.507 - mae: 42.224 - mean_q: 32.795 Interval 1512 (755500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2545 Interval 1513 (756000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1825 1 episodes - episode_reward: 161.951 [161.951, 161.951] - loss: 19.336 - mae: 42.901 - mean_q: 32.857 Interval 1514 (756500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3163 1 episodes - episode_reward: 21.906 [21.906, 21.906] - loss: 18.998 - mae: 43.070 - mean_q: 32.634 Interval 1515 (757000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5236 2 episodes - episode_reward: -200.153 [-398.882, -1.424] - loss: 20.595 - mae: 42.817 - mean_q: 33.315 Interval 1516 (757500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7102 1 episodes - episode_reward: -962.862 [-962.862, -962.862] - loss: 14.906 - mae: 42.674 - mean_q: 33.222 Interval 1517 (758000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9895 2 episodes - episode_reward: -275.653 [-522.063, -29.244] - loss: 14.808 - mae: 43.386 - mean_q: 33.227 Interval 1518 (758500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4578 1 episodes - episode_reward: 245.152 [245.152, 245.152] - loss: 17.254 - mae: 43.403 - mean_q: 33.787 Interval 1519 (759000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0153 2 episodes - episode_reward: 29.444 [-195.148, 254.036] - loss: 16.542 - mae: 43.886 - mean_q: 34.091 Interval 1520 (759500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2992 1 episodes - episode_reward: 223.644 [223.644, 223.644] - loss: 19.059 - mae: 44.144 - mean_q: 33.730 Interval 1521 (760000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0948 2 episodes - episode_reward: -73.580 [-82.782, -64.377] - loss: 15.099 - mae: 44.706 - mean_q: 33.238 Interval 1522 (760500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7232 3 episodes - episode_reward: -113.774 [-165.854, -10.220] - loss: 12.368 - mae: 43.841 - mean_q: 34.946 Interval 1523 (761000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0399 Interval 1524 (761500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9063 2 episodes - episode_reward: -166.925 [-513.828, 179.978] - loss: 11.528 - mae: 44.797 - mean_q: 35.476 Interval 1525 (762000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2844 1 episodes - episode_reward: -254.237 [-254.237, -254.237] - loss: 11.367 - mae: 46.215 - mean_q: 34.202 Interval 1526 (762500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0427 Interval 1527 (763000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5139 5 episodes - episode_reward: -141.082 [-277.892, 207.659] - loss: 13.212 - mae: 46.892 - mean_q: 34.972 Interval 1528 (763500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0212 Interval 1529 (764000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2078 1 episodes - episode_reward: -101.956 [-101.956, -101.956] - loss: 12.923 - mae: 47.876 - mean_q: 35.172 Interval 1530 (764500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0719 Interval 1531 (765000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3654 1 episodes - episode_reward: 154.618 [154.618, 154.618] - loss: 13.153 - mae: 47.590 - mean_q: 37.085 Interval 1532 (765500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0482 1 episodes - episode_reward: 184.139 [184.139, 184.139] - loss: 13.874 - mae: 47.867 - mean_q: 37.032 Interval 1533 (766000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1701 1 episodes - episode_reward: -179.833 [-179.833, -179.833] - loss: 15.417 - mae: 48.085 - mean_q: 38.125 Interval 1534 (766500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0465 Interval 1535 (767000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2459 1 episodes - episode_reward: 146.994 [146.994, 146.994] - loss: 20.963 - mae: 49.053 - mean_q: 37.247 Interval 1536 (767500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1197 3 episodes - episode_reward: -346.852 [-598.870, -202.448] - loss: 12.591 - mae: 49.549 - mean_q: 37.106 Interval 1537 (768000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0012 Interval 1538 (768500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3572 1 episodes - episode_reward: 135.783 [135.783, 135.783] - loss: 26.367 - mae: 49.613 - mean_q: 37.057 Interval 1539 (769000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0224 Interval 1540 (769500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4314 3 episodes - episode_reward: -68.148 [-196.379, 157.121] - loss: 14.102 - mae: 49.998 - mean_q: 37.148 Interval 1541 (770000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0425 2 episodes - episode_reward: -4.161 [-168.162, 159.839] - loss: 11.387 - mae: 50.046 - mean_q: 38.029 Interval 1542 (770500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0397 Interval 1543 (771000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2729 Interval 1544 (771500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0253 2 episodes - episode_reward: -353.384 [-396.658, -310.109] - loss: 16.475 - mae: 49.116 - mean_q: 39.526 Interval 1545 (772000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1993 Interval 1546 (772500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0220 Interval 1547 (773000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0032 2 episodes - episode_reward: -57.301 [-172.935, 58.334] - loss: 14.445 - mae: 50.721 - mean_q: 39.213 Interval 1548 (773500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2995 2 episodes - episode_reward: -343.042 [-454.627, -231.457] - loss: 12.485 - mae: 50.583 - mean_q: 40.222 Interval 1549 (774000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4393 1 episodes - episode_reward: 245.524 [245.524, 245.524] - loss: 14.962 - mae: 50.678 - mean_q: 40.428 Interval 1550 (774500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5749 4 episodes - episode_reward: -70.011 [-180.858, 186.293] - loss: 12.534 - mae: 50.184 - mean_q: 41.616 Interval 1551 (775000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9584 8 episodes - episode_reward: -183.138 [-230.925, -77.018] - loss: 13.574 - mae: 50.851 - mean_q: 40.758 Interval 1552 (775500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0380 Interval 1553 (776000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3134 2 episodes - episode_reward: -89.379 [-324.929, 146.170] - loss: 16.980 - mae: 50.406 - mean_q: 42.158 Interval 1554 (776500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3425 1 episodes - episode_reward: 179.649 [179.649, 179.649] - loss: 14.048 - mae: 50.513 - mean_q: 42.539 Interval 1555 (777000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5632 6 episodes - episode_reward: -119.326 [-241.066, 185.725] - loss: 11.589 - mae: 50.364 - mean_q: 42.620 Interval 1556 (777500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2454 1 episodes - episode_reward: -187.098 [-187.098, -187.098] - loss: 14.353 - mae: 51.053 - mean_q: 42.240 Interval 1557 (778000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8780 4 episodes - episode_reward: -98.436 [-327.660, 193.644] - loss: 11.936 - mae: 50.837 - mean_q: 40.796 Interval 1558 (778500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2904 1 episodes - episode_reward: 130.709 [130.709, 130.709] - loss: 13.059 - mae: 50.718 - mean_q: 41.802 Interval 1559 (779000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1021 Interval 1560 (779500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6359 1 episodes - episode_reward: -462.732 [-462.732, -462.732] - loss: 14.442 - mae: 50.899 - mean_q: 42.132 Interval 1561 (780000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3393 1 episodes - episode_reward: 267.645 [267.645, 267.645] - loss: 12.490 - mae: 50.714 - mean_q: 43.453 Interval 1562 (780500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0362 Interval 1563 (781000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1972 1 episodes - episode_reward: 154.547 [154.547, 154.547] - loss: 11.892 - mae: 50.266 - mean_q: 44.561 Interval 1564 (781500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3612 1 episodes - episode_reward: 149.842 [149.842, 149.842] - loss: 11.925 - mae: 50.079 - mean_q: 44.026 Interval 1565 (782000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1156 3 episodes - episode_reward: -29.588 [-222.374, 233.609] - loss: 12.789 - mae: 50.103 - mean_q: 45.204 Interval 1566 (782500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1395 Interval 1567 (783000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3046 1 episodes - episode_reward: 215.990 [215.990, 215.990] - loss: 10.879 - mae: 49.874 - mean_q: 46.034 Interval 1568 (783500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3153 3 episodes - episode_reward: -57.660 [-253.003, 199.097] - loss: 10.157 - mae: 49.721 - mean_q: 45.743 Interval 1569 (784000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3493 1 episodes - episode_reward: 243.764 [243.764, 243.764] - loss: 12.929 - mae: 49.984 - mean_q: 46.883 Interval 1570 (784500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2388 2 episodes - episode_reward: -60.384 [-100.000, -20.767] - loss: 8.738 - mae: 49.713 - mean_q: 47.461 Interval 1571 (785000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2059 Interval 1572 (785500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2335 1 episodes - episode_reward: 210.959 [210.959, 210.959] - loss: 9.867 - mae: 50.623 - mean_q: 46.005 Interval 1573 (786000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4200 1 episodes - episode_reward: 137.979 [137.979, 137.979] - loss: 10.565 - mae: 49.951 - mean_q: 47.179 Interval 1574 (786500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0666 Interval 1575 (787000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2259 Interval 1576 (787500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4114 1 episodes - episode_reward: 136.863 [136.863, 136.863] - loss: 14.784 - mae: 49.735 - mean_q: 47.513 Interval 1577 (788000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0495 2 episodes - episode_reward: 49.530 [-116.158, 215.218] - loss: 14.458 - mae: 49.559 - mean_q: 48.506 Interval 1578 (788500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1093 Interval 1579 (789000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0709 2 episodes - episode_reward: -20.379 [-123.777, 83.019] - loss: 11.614 - mae: 48.963 - mean_q: 47.738 Interval 1580 (789500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2739 1 episodes - episode_reward: 158.011 [158.011, 158.011] - loss: 11.042 - mae: 48.256 - mean_q: 48.800 Interval 1581 (790000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2313 Interval 1582 (790500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4050 1 episodes - episode_reward: 222.782 [222.782, 222.782] - loss: 10.149 - mae: 49.109 - mean_q: 48.416 Interval 1583 (791000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5997 4 episodes - episode_reward: -68.582 [-179.002, 199.472] - loss: 10.210 - mae: 49.278 - mean_q: 48.936 Interval 1584 (791500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0935 2 episodes - episode_reward: 37.081 [-157.033, 231.195] - loss: 9.613 - mae: 49.284 - mean_q: 49.434 Interval 1585 (792000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1077 Interval 1586 (792500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1608 3 episodes - episode_reward: -0.101 [-100.000, 199.698] - loss: 9.687 - mae: 48.852 - mean_q: 48.908 Interval 1587 (793000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0657 2 episodes - episode_reward: -36.810 [-207.609, 133.989] - loss: 10.120 - mae: 49.222 - mean_q: 49.268 Interval 1588 (793500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1105 Interval 1589 (794000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1691 1 episodes - episode_reward: 96.413 [96.413, 96.413] - loss: 11.252 - mae: 48.890 - mean_q: 48.803 Interval 1590 (794500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1431 3 episodes - episode_reward: -25.087 [-129.586, 175.425] - loss: 10.618 - mae: 49.184 - mean_q: 49.404 Interval 1591 (795000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1312 1 episodes - episode_reward: -158.494 [-158.494, -158.494] - loss: 12.180 - mae: 49.459 - mean_q: 48.533 Interval 1592 (795500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4288 1 episodes - episode_reward: 233.783 [233.783, 233.783] - loss: 15.235 - mae: 49.615 - mean_q: 49.102 Interval 1593 (796000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3958 5 episodes - episode_reward: -243.341 [-396.317, -23.764] - loss: 11.487 - mae: 50.045 - mean_q: 49.067 Interval 1594 (796500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3257 1 episodes - episode_reward: 222.443 [222.443, 222.443] - loss: 11.596 - mae: 50.367 - mean_q: 49.081 Interval 1595 (797000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0245 2 episodes - episode_reward: 5.522 [-188.782, 199.826] - loss: 13.158 - mae: 50.682 - mean_q: 48.714 Interval 1596 (797500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0383 Interval 1597 (798000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.6860 2 episodes - episode_reward: 190.352 [151.495, 229.209] - loss: 13.706 - mae: 50.588 - mean_q: 48.373 Interval 1598 (798500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2044 Interval 1599 (799000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1930 1 episodes - episode_reward: 213.965 [213.965, 213.965] - loss: 12.945 - mae: 50.312 - mean_q: 47.715 Interval 1600 (799500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0582 2 episodes - episode_reward: 8.243 [-107.869, 124.354] - loss: 11.533 - mae: 50.391 - mean_q: 48.391 Interval 1601 (800000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5579 1 episodes - episode_reward: -256.374 [-256.374, -256.374] - loss: 12.563 - mae: 50.067 - mean_q: 47.865 Interval 1602 (800500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0304 Interval 1603 (801000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0882 Interval 1604 (801500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5459 2 episodes - episode_reward: -116.125 [-334.722, 102.473] - loss: 10.235 - mae: 48.827 - mean_q: 49.850 Interval 1605 (802000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2538 2 episodes - episode_reward: -74.071 [-123.046, -25.096] - loss: 11.903 - mae: 48.893 - mean_q: 49.387 Interval 1606 (802500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1600 Interval 1607 (803000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6434 4 episodes - episode_reward: -202.362 [-408.104, 177.192] - loss: 10.646 - mae: 47.850 - mean_q: 48.391 Interval 1608 (803500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3662 2 episodes - episode_reward: -58.987 [-318.159, 200.185] - loss: 8.619 - mae: 47.404 - mean_q: 48.640 Interval 1609 (804000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4203 1 episodes - episode_reward: 176.785 [176.785, 176.785] - loss: 12.300 - mae: 47.640 - mean_q: 47.679 Interval 1610 (804500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4092 1 episodes - episode_reward: 197.313 [197.313, 197.313] - loss: 12.473 - mae: 47.060 - mean_q: 47.547 Interval 1611 (805000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1262 Interval 1612 (805500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2120 4 episodes - episode_reward: -13.631 [-140.785, 203.753] - loss: 12.460 - mae: 46.138 - mean_q: 46.132 Interval 1613 (806000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2582 1 episodes - episode_reward: 190.362 [190.362, 190.362] - loss: 11.060 - mae: 46.187 - mean_q: 46.793 Interval 1614 (806500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5446 5 episodes - episode_reward: -172.862 [-216.500, -130.920] - loss: 12.300 - mae: 46.105 - mean_q: 45.736 Interval 1615 (807000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3618 1 episodes - episode_reward: 251.012 [251.012, 251.012] - loss: 9.610 - mae: 46.016 - mean_q: 45.200 Interval 1616 (807500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1589 3 episodes - episode_reward: -35.098 [-183.798, 191.040] - loss: 10.159 - mae: 45.737 - mean_q: 45.888 Interval 1617 (808000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0141 Interval 1618 (808500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3637 1 episodes - episode_reward: 128.848 [128.848, 128.848] - loss: 10.093 - mae: 45.345 - mean_q: 45.235 Interval 1619 (809000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3682 1 episodes - episode_reward: 165.944 [165.944, 165.944] - loss: 12.787 - mae: 45.161 - mean_q: 45.965 Interval 1620 (809500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0759 Interval 1621 (810000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1783 2 episodes - episode_reward: 54.271 [-132.795, 241.336] - loss: 7.351 - mae: 44.988 - mean_q: 45.767 Interval 1622 (810500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0159 Interval 1623 (811000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8178 2 episodes - episode_reward: -256.958 [-602.203, 88.287] - loss: 9.312 - mae: 44.816 - mean_q: 45.404 Interval 1624 (811500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2927 1 episodes - episode_reward: 224.943 [224.943, 224.943] - loss: 9.560 - mae: 44.192 - mean_q: 45.351 Interval 1625 (812000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4556 3 episodes - episode_reward: -561.413 [-687.461, -414.801] - loss: 12.370 - mae: 44.121 - mean_q: 45.379 Interval 1626 (812500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2633 4 episodes - episode_reward: -297.127 [-610.207, -104.058] - loss: 12.905 - mae: 44.246 - mean_q: 43.893 Interval 1627 (813000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1078 Interval 1628 (813500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0201 2 episodes - episode_reward: -38.790 [-100.000, 22.421] - loss: 10.175 - mae: 43.650 - mean_q: 43.980 Interval 1629 (814000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0956 Interval 1630 (814500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3618 3 episodes - episode_reward: -46.066 [-583.845, 229.982] - loss: 10.527 - mae: 43.622 - mean_q: 44.161 Interval 1631 (815000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7083 5 episodes - episode_reward: -173.077 [-326.187, -21.906] - loss: 11.867 - mae: 43.375 - mean_q: 43.766 Interval 1632 (815500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4290 3 episodes - episode_reward: -172.996 [-461.843, 213.574] - loss: 10.044 - mae: 43.190 - mean_q: 42.478 Interval 1633 (816000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1244 3 episodes - episode_reward: -74.432 [-281.906, 174.043] - loss: 11.384 - mae: 42.806 - mean_q: 42.018 Interval 1634 (816500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6571 2 episodes - episode_reward: -213.737 [-221.355, -206.119] - loss: 11.177 - mae: 42.533 - mean_q: 40.535 Interval 1635 (817000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.7457 2 episodes - episode_reward: 215.552 [199.849, 231.254] - loss: 9.514 - mae: 41.748 - mean_q: 40.954 Interval 1636 (817500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3105 1 episodes - episode_reward: 109.240 [109.240, 109.240] - loss: 11.656 - mae: 41.586 - mean_q: 40.933 Interval 1637 (818000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3227 1 episodes - episode_reward: 245.378 [245.378, 245.378] - loss: 10.147 - mae: 41.232 - mean_q: 40.795 Interval 1638 (818500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3672 5 episodes - episode_reward: -239.326 [-379.050, -61.442] - loss: 8.793 - mae: 40.843 - mean_q: 40.044 Interval 1639 (819000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4880 4 episodes - episode_reward: -159.939 [-242.268, -100.000] - loss: 9.774 - mae: 40.722 - mean_q: 39.297 Interval 1640 (819500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0694 1 episodes - episode_reward: -241.000 [-241.000, -241.000] - loss: 10.110 - mae: 40.178 - mean_q: 38.274 Interval 1641 (820000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5766 3 episodes - episode_reward: -398.696 [-1172.555, 205.315] - loss: 11.578 - mae: 39.961 - mean_q: 37.918 Interval 1642 (820500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1753 2 episodes - episode_reward: -289.133 [-485.660, -92.606] - loss: 10.003 - mae: 39.696 - mean_q: 37.070 Interval 1643 (821000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1111 1 episodes - episode_reward: 193.856 [193.856, 193.856] - loss: 10.959 - mae: 39.160 - mean_q: 36.802 Interval 1644 (821500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4505 3 episodes - episode_reward: -287.367 [-411.850, -159.026] - loss: 9.486 - mae: 39.343 - mean_q: 35.851 Interval 1645 (822000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6971 3 episodes - episode_reward: -276.845 [-456.024, -139.806] - loss: 9.374 - mae: 39.322 - mean_q: 35.137 Interval 1646 (822500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0160 1 episodes - episode_reward: -52.613 [-52.613, -52.613] - loss: 10.882 - mae: 39.029 - mean_q: 35.005 Interval 1647 (823000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0029 Interval 1648 (823500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0313 Interval 1649 (824000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8840 3 episodes - episode_reward: -157.326 [-415.597, 105.389] - loss: 12.770 - mae: 38.311 - mean_q: 34.503 Interval 1650 (824500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1067 Interval 1651 (825000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4489 1 episodes - episode_reward: 203.667 [203.667, 203.667] - loss: 10.555 - mae: 37.383 - mean_q: 34.388 Interval 1652 (825500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3052 1 episodes - episode_reward: 202.210 [202.210, 202.210] - loss: 12.362 - mae: 37.186 - mean_q: 34.042 Interval 1653 (826000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0016 2 episodes - episode_reward: 0.170 [-190.934, 191.273] - loss: 11.590 - mae: 36.685 - mean_q: 33.742 Interval 1654 (826500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0903 2 episodes - episode_reward: 30.011 [-151.526, 211.548] - loss: 13.040 - mae: 36.777 - mean_q: 33.900 Interval 1655 (827000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1812 Interval 1656 (827500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5367 2 episodes - episode_reward: -223.250 [-258.884, -187.615] - loss: 11.298 - mae: 36.346 - mean_q: 34.816 Interval 1657 (828000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3849 1 episodes - episode_reward: 194.624 [194.624, 194.624] - loss: 10.573 - mae: 36.370 - mean_q: 34.313 Interval 1658 (828500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1321 1 episodes - episode_reward: -49.420 [-49.420, -49.420] - loss: 12.998 - mae: 36.608 - mean_q: 35.037 Interval 1659 (829000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1259 Interval 1660 (829500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0260 Interval 1661 (830000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6293 3 episodes - episode_reward: -78.349 [-215.888, 80.592] - loss: 10.250 - mae: 36.520 - mean_q: 34.981 Interval 1662 (830500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1483 3 episodes - episode_reward: -55.295 [-196.367, 204.685] - loss: 10.570 - mae: 36.696 - mean_q: 35.296 Interval 1663 (831000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8298 3 episodes - episode_reward: -161.997 [-254.672, -21.417] - loss: 11.714 - mae: 36.520 - mean_q: 34.181 Interval 1664 (831500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0956 2 episodes - episode_reward: 23.802 [-170.168, 217.772] - loss: 10.818 - mae: 36.406 - mean_q: 34.974 Interval 1665 (832000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3182 Interval 1666 (832500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1156 Interval 1667 (833000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0410 2 episodes - episode_reward: -59.837 [-287.347, 167.674] - loss: 11.965 - mae: 36.453 - mean_q: 33.758 Interval 1668 (833500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1786 5 episodes - episode_reward: -84.626 [-211.091, 265.803] - loss: 10.006 - mae: 36.620 - mean_q: 34.726 Interval 1669 (834000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1165 Interval 1670 (834500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0901 1 episodes - episode_reward: -145.533 [-145.533, -145.533] - loss: 11.076 - mae: 36.566 - mean_q: 33.781 Interval 1671 (835000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3786 1 episodes - episode_reward: 174.316 [174.316, 174.316] - loss: 11.481 - mae: 36.562 - mean_q: 34.145 Interval 1672 (835500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7065 3 episodes - episode_reward: -236.169 [-276.367, -185.785] - loss: 13.947 - mae: 36.444 - mean_q: 34.233 Interval 1673 (836000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3705 3 episodes - episode_reward: -100.844 [-170.927, 21.749] - loss: 9.519 - mae: 36.946 - mean_q: 33.422 Interval 1674 (836500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1180 Interval 1675 (837000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8801 4 episodes - episode_reward: -87.909 [-206.284, 208.999] - loss: 11.286 - mae: 37.032 - mean_q: 32.433 Interval 1676 (837500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.5345 1 episodes - episode_reward: 238.205 [238.205, 238.205] - loss: 12.533 - mae: 37.199 - mean_q: 32.384 Interval 1677 (838000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2432 Interval 1678 (838500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4561 3 episodes - episode_reward: -8.824 [-178.823, 252.350] - loss: 10.200 - mae: 37.650 - mean_q: 33.372 Interval 1679 (839000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5152 2 episodes - episode_reward: -141.980 [-150.527, -133.433] - loss: 11.460 - mae: 37.877 - mean_q: 33.620 Interval 1680 (839500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.4001 1 episodes - episode_reward: 50.658 [50.658, 50.658] - loss: 12.169 - mae: 37.709 - mean_q: 33.597 Interval 1681 (840000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9393 5 episodes - episode_reward: -86.394 [-244.180, 259.845] - loss: 12.504 - mae: 38.069 - mean_q: 33.877 Interval 1682 (840500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0743 2 episodes - episode_reward: -17.332 [-283.968, 249.305] - loss: 9.709 - mae: 38.156 - mean_q: 33.831 Interval 1683 (841000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2085 3 episodes - episode_reward: -1.253 [-144.932, 260.086] - loss: 11.490 - mae: 38.349 - mean_q: 34.009 Interval 1684 (841500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3242 1 episodes - episode_reward: 199.602 [199.602, 199.602] - loss: 10.390 - mae: 38.479 - mean_q: 34.354 Interval 1685 (842000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0037 Interval 1686 (842500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.7305 2 episodes - episode_reward: 182.400 [120.539, 244.261] - loss: 9.890 - mae: 39.348 - mean_q: 35.283 Interval 1687 (843000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1544 Interval 1688 (843500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2533 Interval 1689 (844000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1005 Interval 1690 (844500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1729 1 episodes - episode_reward: -148.169 [-148.169, -148.169] - loss: 11.231 - mae: 39.186 - mean_q: 34.571 Interval 1691 (845000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1581 Interval 1692 (845500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0445 2 episodes - episode_reward: 21.597 [-157.326, 200.520] - loss: 14.280 - mae: 39.162 - mean_q: 35.823 Interval 1693 (846000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1739 5 episodes - episode_reward: -76.814 [-182.512, 154.702] - loss: 10.822 - mae: 38.955 - mean_q: 34.965 Interval 1694 (846500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1142 2 episodes - episode_reward: -335.309 [-427.414, -243.203] - loss: 11.489 - mae: 38.732 - mean_q: 34.327 Interval 1695 (847000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1707 Interval 1696 (847500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0272 Interval 1697 (848000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2470 1 episodes - episode_reward: -28.876 [-28.876, -28.876] - loss: 11.556 - mae: 38.341 - mean_q: 33.753 Interval 1698 (848500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6055 4 episodes - episode_reward: -93.009 [-206.631, 124.778] - loss: 10.687 - mae: 38.474 - mean_q: 33.551 Interval 1699 (849000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1421 5 episodes - episode_reward: -95.099 [-221.964, 225.020] - loss: 10.958 - mae: 38.668 - mean_q: 33.659 Interval 1700 (849500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4663 3 episodes - episode_reward: -53.153 [-190.030, 210.647] - loss: 14.477 - mae: 39.089 - mean_q: 33.179 Interval 1701 (850000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1882 1 episodes - episode_reward: -211.509 [-211.509, -211.509] - loss: 10.356 - mae: 39.073 - mean_q: 33.148 Interval 1702 (850500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1993 1 episodes - episode_reward: 136.388 [136.388, 136.388] - loss: 9.076 - mae: 39.115 - mean_q: 33.166 Interval 1703 (851000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8849 2 episodes - episode_reward: -231.828 [-255.442, -208.214] - loss: 14.617 - mae: 39.431 - mean_q: 33.490 Interval 1704 (851500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1110 Interval 1705 (852000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1362 Interval 1706 (852500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2130 1 episodes - episode_reward: 22.527 [22.527, 22.527] - loss: 12.270 - mae: 39.156 - mean_q: 33.015 Interval 1707 (853000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4846 4 episodes - episode_reward: -187.485 [-314.410, -96.032] - loss: 10.872 - mae: 38.680 - mean_q: 32.721 Interval 1708 (853500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2970 1 episodes - episode_reward: 131.464 [131.464, 131.464] - loss: 10.215 - mae: 38.359 - mean_q: 32.315 Interval 1709 (854000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1903 4 episodes - episode_reward: -149.578 [-215.015, -83.980] - loss: 12.346 - mae: 37.864 - mean_q: 31.222 Interval 1710 (854500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1047 Interval 1711 (855000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0270 Interval 1712 (855500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0290 Interval 1713 (856000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3629 1 episodes - episode_reward: 135.327 [135.327, 135.327] - loss: 11.181 - mae: 37.161 - mean_q: 30.012 Interval 1714 (856500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3400 1 episodes - episode_reward: 217.616 [217.616, 217.616] - loss: 14.480 - mae: 36.701 - mean_q: 31.030 Interval 1715 (857000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3423 1 episodes - episode_reward: 183.730 [183.730, 183.730] - loss: 10.605 - mae: 36.334 - mean_q: 30.004 Interval 1716 (857500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2812 2 episodes - episode_reward: 77.484 [-100.000, 254.967] - loss: 14.855 - mae: 36.353 - mean_q: 30.294 Interval 1717 (858000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3952 4 episodes - episode_reward: -172.703 [-209.213, -148.869] - loss: 9.944 - mae: 36.282 - mean_q: 30.212 Interval 1718 (858500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1595 Interval 1719 (859000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1968 1 episodes - episode_reward: 183.606 [183.606, 183.606] - loss: 12.582 - mae: 35.685 - mean_q: 29.813 Interval 1720 (859500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5735 2 episodes - episode_reward: -153.434 [-180.296, -126.573] - loss: 13.406 - mae: 35.650 - mean_q: 28.845 Interval 1721 (860000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1015 Interval 1722 (860500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1099 4 episodes - episode_reward: 6.176 [-181.685, 207.252] - loss: 10.667 - mae: 36.007 - mean_q: 28.749 Interval 1723 (861000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6110 5 episodes - episode_reward: -179.049 [-242.626, -100.000] - loss: 10.641 - mae: 35.994 - mean_q: 28.502 Interval 1724 (861500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1303 2 episodes - episode_reward: 53.571 [-137.516, 244.657] - loss: 11.353 - mae: 35.726 - mean_q: 28.586 Interval 1725 (862000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1460 1 episodes - episode_reward: -222.356 [-222.356, -222.356] - loss: 9.065 - mae: 35.722 - mean_q: 28.215 Interval 1726 (862500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5521 3 episodes - episode_reward: -96.739 [-222.730, 153.102] - loss: 10.126 - mae: 35.428 - mean_q: 27.718 Interval 1727 (863000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8031 3 episodes - episode_reward: -48.713 [-164.482, 24.102] - loss: 10.308 - mae: 35.451 - mean_q: 27.402 Interval 1728 (863500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7854 4 episodes - episode_reward: -244.968 [-279.026, -224.963] - loss: 13.693 - mae: 35.858 - mean_q: 27.849 Interval 1729 (864000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0164 2 episodes - episode_reward: 9.222 [-207.562, 226.007] - loss: 10.163 - mae: 36.084 - mean_q: 27.452 Interval 1730 (864500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3958 6 episodes - episode_reward: -176.810 [-239.426, -100.000] - loss: 10.660 - mae: 35.915 - mean_q: 27.166 Interval 1731 (865000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8942 4 episodes - episode_reward: -178.525 [-294.084, -20.769] - loss: 12.043 - mae: 35.970 - mean_q: 25.720 Interval 1732 (865500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5766 2 episodes - episode_reward: -140.240 [-184.583, -95.898] - loss: 10.093 - mae: 36.266 - mean_q: 26.925 Interval 1733 (866000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6954 3 episodes - episode_reward: -112.585 [-234.710, 17.994] - loss: 11.885 - mae: 36.391 - mean_q: 26.524 Interval 1734 (866500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5307 2 episodes - episode_reward: -175.841 [-178.196, -173.486] - loss: 15.280 - mae: 36.581 - mean_q: 26.610 Interval 1735 (867000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9905 5 episodes - episode_reward: -78.967 [-288.718, 194.567] - loss: 12.636 - mae: 36.505 - mean_q: 25.610 Interval 1736 (867500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4856 2 episodes - episode_reward: -148.440 [-240.952, -55.927] - loss: 11.512 - mae: 37.088 - mean_q: 25.614 Interval 1737 (868000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8933 2 episodes - episode_reward: -118.592 [-128.163, -109.022] - loss: 11.758 - mae: 36.844 - mean_q: 25.934 Interval 1738 (868500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1949 4 episodes - episode_reward: -177.236 [-290.843, -96.069] - loss: 12.774 - mae: 36.599 - mean_q: 25.094 Interval 1739 (869000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1744 Interval 1740 (869500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0447 Interval 1741 (870000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0068 1 episodes - episode_reward: 101.323 [101.323, 101.323] - loss: 14.048 - mae: 36.597 - mean_q: 25.091 Interval 1742 (870500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8507 4 episodes - episode_reward: -129.047 [-263.330, -50.515] - loss: 14.583 - mae: 36.777 - mean_q: 25.201 Interval 1743 (871000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1570 1 episodes - episode_reward: -61.688 [-61.688, -61.688] - loss: 13.291 - mae: 36.857 - mean_q: 25.515 Interval 1744 (871500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2848 Interval 1745 (872000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1203 Interval 1746 (872500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0357 Interval 1747 (873000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0661 Interval 1748 (873500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3180 3 episodes - episode_reward: -170.779 [-213.402, -127.698] - loss: 12.795 - mae: 37.111 - mean_q: 27.477 Interval 1749 (874000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0466 1 episodes - episode_reward: -2.150 [-2.150, -2.150] - loss: 13.454 - mae: 37.755 - mean_q: 26.850 Interval 1750 (874500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0782 Interval 1751 (875000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1473 1 episodes - episode_reward: 161.892 [161.892, 161.892] - loss: 12.052 - mae: 38.787 - mean_q: 28.056 Interval 1752 (875500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0698 Interval 1753 (876000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0471 Interval 1754 (876500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.2779 1 episodes - episode_reward: 19.303 [19.303, 19.303] - loss: 15.068 - mae: 38.558 - mean_q: 28.265 Interval 1755 (877000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2661 Interval 1756 (877500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1731 Interval 1757 (878000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3508 1 episodes - episode_reward: 80.098 [80.098, 80.098] - loss: 14.918 - mae: 39.194 - mean_q: 29.714 Interval 1758 (878500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0823 Interval 1759 (879000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7364 3 episodes - episode_reward: -95.531 [-365.965, 223.567] - loss: 10.761 - mae: 39.980 - mean_q: 29.703 Interval 1760 (879500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3953 1 episodes - episode_reward: 224.958 [224.958, 224.958] - loss: 14.437 - mae: 40.047 - mean_q: 29.015 Interval 1761 (880000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7821 2 episodes - episode_reward: -182.887 [-193.628, -172.146] - loss: 13.310 - mae: 39.994 - mean_q: 31.046 Interval 1762 (880500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2986 4 episodes - episode_reward: -290.618 [-624.418, -101.988] - loss: 11.310 - mae: 40.481 - mean_q: 31.000 Interval 1763 (881000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4758 5 episodes - episode_reward: -219.651 [-395.951, -100.000] - loss: 13.096 - mae: 40.468 - mean_q: 30.804 Interval 1764 (881500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3234 2 episodes - episode_reward: -120.924 [-214.268, -27.581] - loss: 15.449 - mae: 40.436 - mean_q: 30.873 Interval 1765 (882000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5518 5 episodes - episode_reward: -184.126 [-237.232, -130.920] - loss: 10.099 - mae: 40.593 - mean_q: 30.531 Interval 1766 (882500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0206 1 episodes - episode_reward: 16.262 [16.262, 16.262] - loss: 12.783 - mae: 40.619 - mean_q: 30.143 Interval 1767 (883000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2930 1 episodes - episode_reward: 217.632 [217.632, 217.632] - loss: 13.295 - mae: 40.808 - mean_q: 30.371 Interval 1768 (883500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3444 3 episodes - episode_reward: -56.968 [-124.426, -3.837] - loss: 12.536 - mae: 41.146 - mean_q: 30.112 Interval 1769 (884000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.5203 1 episodes - episode_reward: 246.725 [246.725, 246.725] - loss: 12.480 - mae: 41.001 - mean_q: 30.982 Interval 1770 (884500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2487 2 episodes - episode_reward: -48.015 [-101.510, 5.480] - loss: 10.906 - mae: 41.296 - mean_q: 31.874 Interval 1771 (885000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7370 2 episodes - episode_reward: -439.942 [-673.624, -206.260] - loss: 13.623 - mae: 41.675 - mean_q: 31.107 Interval 1772 (885500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2360 Interval 1773 (886000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0495 Interval 1774 (886500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6317 4 episodes - episode_reward: 3.940 [-165.160, 253.078] - loss: 13.981 - mae: 41.967 - mean_q: 32.578 Interval 1775 (887000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3005 3 episodes - episode_reward: -274.615 [-387.889, -122.142] - loss: 16.736 - mae: 41.645 - mean_q: 33.507 Interval 1776 (887500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0285 Interval 1777 (888000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3482 1 episodes - episode_reward: 151.035 [151.035, 151.035] - loss: 12.875 - mae: 41.835 - mean_q: 33.058 Interval 1778 (888500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0233 4 episodes - episode_reward: -244.745 [-600.429, -35.913] - loss: 14.951 - mae: 42.212 - mean_q: 33.810 Interval 1779 (889000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4265 4 episodes - episode_reward: -281.571 [-508.457, -68.537] - loss: 13.644 - mae: 42.842 - mean_q: 34.102 Interval 1780 (889500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5072 1 episodes - episode_reward: -412.039 [-412.039, -412.039] - loss: 15.532 - mae: 43.356 - mean_q: 34.111 Interval 1781 (890000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8343 1 episodes - episode_reward: 135.095 [135.095, 135.095] - loss: 13.905 - mae: 42.997 - mean_q: 33.572 Interval 1782 (890500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7056 2 episodes - episode_reward: -658.280 [-1110.247, -206.313] - loss: 17.387 - mae: 43.241 - mean_q: 33.044 Interval 1783 (891000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6199 4 episodes - episode_reward: -232.259 [-400.450, -82.433] - loss: 15.659 - mae: 42.722 - mean_q: 32.226 Interval 1784 (891500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1313 1 episodes - episode_reward: 228.010 [228.010, 228.010] - loss: 17.040 - mae: 42.399 - mean_q: 32.499 Interval 1785 (892000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2704 Interval 1786 (892500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0242 Interval 1787 (893000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1145 3 episodes - episode_reward: -271.580 [-612.844, -49.665] - loss: 13.091 - mae: 42.380 - mean_q: 32.110 Interval 1788 (893500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0516 Interval 1789 (894000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8377 2 episodes - episode_reward: -143.055 [-462.479, 176.370] - loss: 14.813 - mae: 42.508 - mean_q: 31.183 Interval 1790 (894500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1323 1 episodes - episode_reward: -96.686 [-96.686, -96.686] - loss: 12.887 - mae: 42.603 - mean_q: 30.588 Interval 1791 (895000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.5020 1 episodes - episode_reward: 214.485 [214.485, 214.485] - loss: 15.363 - mae: 42.587 - mean_q: 31.697 Interval 1792 (895500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3007 Interval 1793 (896000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5794 2 episodes - episode_reward: -45.323 [-343.235, 252.588] - loss: 25.014 - mae: 44.153 - mean_q: 33.271 Interval 1794 (896500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2346 Interval 1795 (897000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6014 Interval 1796 (897500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6954 1 episodes - episode_reward: -869.714 [-869.714, -869.714] - loss: 41.241 - mae: 47.434 - mean_q: 39.020 Interval 1797 (898000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0199 Interval 1798 (898500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3279 1 episodes - episode_reward: 171.723 [171.723, 171.723] - loss: 48.598 - mae: 50.157 - mean_q: 42.967 Interval 1799 (899000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3478 3 episodes - episode_reward: -208.137 [-464.345, 185.163] - loss: 48.122 - mae: 49.715 - mean_q: 42.815 Interval 1800 (899500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1808 1 episodes - episode_reward: -27.816 [-27.816, -27.816] - loss: 117.577 - mae: 50.045 - mean_q: 44.273 Interval 1801 (900000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3173 1 episodes - episode_reward: 85.759 [85.759, 85.759] - loss: 65.587 - mae: 50.067 - mean_q: 43.660 Interval 1802 (900500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1373 1 episodes - episode_reward: -49.080 [-49.080, -49.080] - loss: 47.480 - mae: 51.110 - mean_q: 43.927 Interval 1803 (901000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.3601 1 episodes - episode_reward: 286.197 [286.197, 286.197] - loss: 40.101 - mae: 51.155 - mean_q: 45.283 Interval 1804 (901500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6823 3 episodes - episode_reward: -269.977 [-481.235, -147.656] - loss: 37.613 - mae: 51.604 - mean_q: 45.786 Interval 1805 (902000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1483 Interval 1806 (902500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1505 Interval 1807 (903000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1770 3 episodes - episode_reward: -168.164 [-515.687, 109.486] - loss: 49.811 - mae: 53.469 - mean_q: 47.665 Interval 1808 (903500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4532 1 episodes - episode_reward: -355.303 [-355.303, -355.303] - loss: 29.261 - mae: 52.720 - mean_q: 46.999 Interval 1809 (904000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9272 3 episodes - episode_reward: -312.569 [-571.527, -27.245] - loss: 29.396 - mae: 53.219 - mean_q: 47.861 Interval 1810 (904500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0229 Interval 1811 (905000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1771 1 episodes - episode_reward: 141.612 [141.612, 141.612] - loss: 27.210 - mae: 52.393 - mean_q: 47.431 Interval 1812 (905500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0576 Interval 1813 (906000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2677 1 episodes - episode_reward: 99.517 [99.517, 99.517] - loss: 24.572 - mae: 52.285 - mean_q: 46.748 Interval 1814 (906500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6217 3 episodes - episode_reward: -116.200 [-171.207, -58.375] - loss: 20.641 - mae: 52.725 - mean_q: 46.967 Interval 1815 (907000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3660 1 episodes - episode_reward: -120.424 [-120.424, -120.424] - loss: 23.138 - mae: 53.060 - mean_q: 46.337 Interval 1816 (907500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0511 Interval 1817 (908000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7572 3 episodes - episode_reward: -207.822 [-496.396, 75.050] - loss: 24.678 - mae: 53.256 - mean_q: 47.169 Interval 1818 (908500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6894 2 episodes - episode_reward: -555.765 [-738.878, -372.653] - loss: 26.626 - mae: 52.971 - mean_q: 46.179 Interval 1819 (909000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3463 1 episodes - episode_reward: 142.642 [142.642, 142.642] - loss: 26.380 - mae: 52.792 - mean_q: 45.505 Interval 1820 (909500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9730 3 episodes - episode_reward: -161.023 [-313.920, -69.148] - loss: 23.574 - mae: 53.606 - mean_q: 45.972 Interval 1821 (910000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1139 1 episodes - episode_reward: 283.172 [283.172, 283.172] - loss: 23.620 - mae: 53.233 - mean_q: 44.377 Interval 1822 (910500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8183 3 episodes - episode_reward: -248.066 [-433.917, -113.259] - loss: 21.856 - mae: 53.130 - mean_q: 43.875 Interval 1823 (911000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1445 Interval 1824 (911500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3275 1 episodes - episode_reward: 197.390 [197.390, 197.390] - loss: 18.990 - mae: 52.927 - mean_q: 45.716 Interval 1825 (912000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4296 3 episodes - episode_reward: -382.770 [-761.893, -21.343] - loss: 22.108 - mae: 51.703 - mean_q: 44.498 Interval 1826 (912500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6058 2 episodes - episode_reward: -160.041 [-164.311, -155.771] - loss: 20.765 - mae: 51.994 - mean_q: 44.113 Interval 1827 (913000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1001 Interval 1828 (913500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8062 3 episodes - episode_reward: -244.360 [-371.722, -152.067] - loss: 17.708 - mae: 50.472 - mean_q: 43.009 Interval 1829 (914000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9924 2 episodes - episode_reward: -606.303 [-793.269, -419.336] - loss: 17.776 - mae: 50.454 - mean_q: 41.314 Interval 1830 (914500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1684 Interval 1831 (915000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0401 Interval 1832 (915500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2138 1 episodes - episode_reward: 181.649 [181.649, 181.649] - loss: 19.678 - mae: 50.044 - mean_q: 42.206 Interval 1833 (916000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2046 Interval 1834 (916500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1644 2 episodes - episode_reward: -490.709 [-1183.931, 202.512] - loss: 19.186 - mae: 49.353 - mean_q: 44.114 Interval 1835 (917000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4738 1 episodes - episode_reward: -730.754 [-730.754, -730.754] - loss: 16.279 - mae: 48.884 - mean_q: 43.905 Interval 1836 (917500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1149 Interval 1837 (918000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3840 2 episodes - episode_reward: -401.748 [-548.144, -255.351] - loss: 16.650 - mae: 48.913 - mean_q: 44.109 Interval 1838 (918500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9940 3 episodes - episode_reward: -165.472 [-238.451, -108.870] - loss: 18.444 - mae: 48.978 - mean_q: 44.071 Interval 1839 (919000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1279 Interval 1840 (919500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2175 Interval 1841 (920000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7023 1 episodes - episode_reward: -546.821 [-546.821, -546.821] - loss: 17.280 - mae: 49.292 - mean_q: 44.337 Interval 1842 (920500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4912 2 episodes - episode_reward: -41.686 [-269.118, 185.747] - loss: 14.112 - mae: 49.431 - mean_q: 44.180 Interval 1843 (921000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1161 1 episodes - episode_reward: -743.316 [-743.316, -743.316] - loss: 17.481 - mae: 49.142 - mean_q: 43.511 Interval 1844 (921500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1185 Interval 1845 (922000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1878 2 episodes - episode_reward: 109.508 [-23.376, 242.393] - loss: 17.361 - mae: 48.137 - mean_q: 43.577 Interval 1846 (922500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3386 2 episodes - episode_reward: -75.988 [-366.586, 214.610] - loss: 15.543 - mae: 48.073 - mean_q: 42.918 Interval 1847 (923000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8189 3 episodes - episode_reward: -147.560 [-199.857, -104.240] - loss: 14.396 - mae: 47.854 - mean_q: 40.452 Interval 1848 (923500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3829 2 episodes - episode_reward: -215.908 [-332.573, -99.243] - loss: 16.319 - mae: 47.942 - mean_q: 40.483 Interval 1849 (924000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9761 2 episodes - episode_reward: -885.188 [-939.767, -830.609] - loss: 17.045 - mae: 47.859 - mean_q: 40.315 Interval 1850 (924500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5060 2 episodes - episode_reward: -608.047 [-889.677, -326.418] - loss: 19.208 - mae: 47.761 - mean_q: 38.734 Interval 1851 (925000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4801 1 episodes - episode_reward: -264.302 [-264.302, -264.302] - loss: 17.105 - mae: 47.906 - mean_q: 38.334 Interval 1852 (925500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3203 2 episodes - episode_reward: -321.630 [-629.467, -13.792] - loss: 16.639 - mae: 46.774 - mean_q: 37.562 Interval 1853 (926000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0206 3 episodes - episode_reward: -129.354 [-430.872, 199.367] - loss: 17.810 - mae: 46.608 - mean_q: 37.173 Interval 1854 (926500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7733 3 episodes - episode_reward: -179.392 [-310.189, -57.341] - loss: 17.518 - mae: 46.537 - mean_q: 35.723 Interval 1855 (927000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1713 1 episodes - episode_reward: -16.918 [-16.918, -16.918] - loss: 18.134 - mae: 46.567 - mean_q: 35.233 Interval 1856 (927500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2164 2 episodes - episode_reward: -292.387 [-713.949, 129.175] - loss: 18.248 - mae: 47.156 - mean_q: 35.342 Interval 1857 (928000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2673 Interval 1858 (928500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3446 1 episodes - episode_reward: 211.346 [211.346, 211.346] - loss: 17.669 - mae: 46.978 - mean_q: 33.307 Interval 1859 (929000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2038 1 episodes - episode_reward: 201.139 [201.139, 201.139] - loss: 18.567 - mae: 47.172 - mean_q: 32.378 Interval 1860 (929500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1692 Interval 1861 (930000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2119 5 episodes - episode_reward: -217.381 [-480.874, 181.991] - loss: 22.018 - mae: 46.652 - mean_q: 32.805 Interval 1862 (930500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0569 Interval 1863 (931000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9077 1 episodes - episode_reward: -371.085 [-371.085, -371.085] - loss: 20.121 - mae: 46.912 - mean_q: 30.940 Interval 1864 (931500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3197 4 episodes - episode_reward: -291.554 [-402.700, -116.151] - loss: 18.763 - mae: 46.798 - mean_q: 30.745 Interval 1865 (932000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1281 Interval 1866 (932500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4928 3 episodes - episode_reward: -87.089 [-296.263, 205.239] - loss: 18.944 - mae: 45.686 - mean_q: 28.976 Interval 1867 (933000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8523 2 episodes - episode_reward: -190.183 [-234.075, -146.291] - loss: 18.792 - mae: 46.378 - mean_q: 28.452 Interval 1868 (933500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0760 2 episodes - episode_reward: -254.895 [-358.414, -151.375] - loss: 21.141 - mae: 46.201 - mean_q: 27.715 Interval 1869 (934000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2419 1 episodes - episode_reward: -155.141 [-155.141, -155.141] - loss: 20.088 - mae: 46.458 - mean_q: 26.859 Interval 1870 (934500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4134 2 episodes - episode_reward: -108.260 [-167.336, -49.183] - loss: 18.229 - mae: 46.360 - mean_q: 26.443 Interval 1871 (935000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9200 3 episodes - episode_reward: -144.892 [-188.494, -109.097] - loss: 16.783 - mae: 46.765 - mean_q: 26.002 Interval 1872 (935500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0796 1 episodes - episode_reward: -83.994 [-83.994, -83.994] - loss: 17.219 - mae: 46.608 - mean_q: 26.182 Interval 1873 (936000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2353 3 episodes - episode_reward: -156.545 [-177.981, -144.522] - loss: 19.567 - mae: 46.430 - mean_q: 26.460 Interval 1874 (936500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1437 1 episodes - episode_reward: -112.780 [-112.780, -112.780] - loss: 17.477 - mae: 46.762 - mean_q: 26.524 Interval 1875 (937000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4882 5 episodes - episode_reward: -242.392 [-561.565, -56.310] - loss: 19.838 - mae: 47.000 - mean_q: 25.481 Interval 1876 (937500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9727 2 episodes - episode_reward: -242.771 [-423.283, -62.259] - loss: 19.703 - mae: 47.578 - mean_q: 25.009 Interval 1877 (938000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6202 2 episodes - episode_reward: -160.061 [-182.354, -137.767] - loss: 18.475 - mae: 48.045 - mean_q: 23.974 Interval 1878 (938500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5271 3 episodes - episode_reward: -63.876 [-238.596, 107.409] - loss: 18.089 - mae: 48.013 - mean_q: 24.329 Interval 1879 (939000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7343 5 episodes - episode_reward: -183.803 [-303.022, -45.417] - loss: 21.018 - mae: 47.309 - mean_q: 24.557 Interval 1880 (939500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8923 3 episodes - episode_reward: -309.144 [-680.685, -106.238] - loss: 20.430 - mae: 46.893 - mean_q: 24.347 Interval 1881 (940000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6278 2 episodes - episode_reward: -208.287 [-213.716, -202.859] - loss: 17.719 - mae: 47.292 - mean_q: 24.166 Interval 1882 (940500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1159 3 episodes - episode_reward: -165.025 [-198.701, -114.710] - loss: 22.284 - mae: 47.069 - mean_q: 23.018 Interval 1883 (941000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0266 Interval 1884 (941500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1119 1 episodes - episode_reward: -66.993 [-66.993, -66.993] - loss: 21.694 - mae: 46.537 - mean_q: 23.808 Interval 1885 (942000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1248 Interval 1886 (942500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3349 1 episodes - episode_reward: -205.619 [-205.619, -205.619] - loss: 20.705 - mae: 46.278 - mean_q: 22.915 Interval 1887 (943000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2309 Interval 1888 (943500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0659 Interval 1889 (944000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3286 4 episodes - episode_reward: -216.308 [-295.536, -100.000] - loss: 17.305 - mae: 46.000 - mean_q: 24.860 Interval 1890 (944500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2016 1 episodes - episode_reward: -131.428 [-131.428, -131.428] - loss: 18.883 - mae: 46.439 - mean_q: 24.135 Interval 1891 (945000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0941 Interval 1892 (945500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4538 1 episodes - episode_reward: -178.713 [-178.713, -178.713] - loss: 18.483 - mae: 46.406 - mean_q: 24.501 Interval 1893 (946000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0952 3 episodes - episode_reward: -184.655 [-298.643, -100.000] - loss: 20.215 - mae: 46.353 - mean_q: 24.451 Interval 1894 (946500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0527 Interval 1895 (947000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2758 Interval 1896 (947500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1575 Interval 1897 (948000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8520 6 episodes - episode_reward: -272.591 [-459.083, -118.212] - loss: 19.286 - mae: 45.057 - mean_q: 23.472 Interval 1898 (948500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0624 Interval 1899 (949000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6477 2 episodes - episode_reward: -150.880 [-180.225, -121.535] - loss: 20.322 - mae: 44.688 - mean_q: 22.109 Interval 1900 (949500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0407 Interval 1901 (950000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2100 Interval 1902 (950500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0452 Interval 1903 (951000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1455 2 episodes - episode_reward: -22.004 [-28.005, -16.002] - loss: 19.738 - mae: 44.685 - mean_q: 20.634 Interval 1904 (951500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0329 Interval 1905 (952000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1965 4 episodes - episode_reward: -144.570 [-228.362, -100.000] - loss: 20.348 - mae: 44.314 - mean_q: 21.947 Interval 1906 (952500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3814 1 episodes - episode_reward: -229.569 [-229.569, -229.569] - loss: 18.142 - mae: 44.692 - mean_q: 22.101 Interval 1907 (953000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2183 2 episodes - episode_reward: -73.280 [-117.849, -28.711] - loss: 17.139 - mae: 44.843 - mean_q: 23.536 Interval 1908 (953500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3056 1 episodes - episode_reward: -115.265 [-115.265, -115.265] - loss: 16.412 - mae: 45.344 - mean_q: 22.197 Interval 1909 (954000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5021 3 episodes - episode_reward: -88.479 [-331.023, 183.309] - loss: 17.808 - mae: 44.710 - mean_q: 23.342 Interval 1910 (954500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0574 Interval 1911 (955000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0669 3 episodes - episode_reward: -191.282 [-347.616, -108.776] - loss: 17.188 - mae: 44.922 - mean_q: 22.365 Interval 1912 (955500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4575 5 episodes - episode_reward: -206.329 [-429.847, -92.341] - loss: 17.302 - mae: 44.972 - mean_q: 21.322 Interval 1913 (956000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2668 2 episodes - episode_reward: -177.577 [-247.317, -107.838] - loss: 17.482 - mae: 45.445 - mean_q: 22.145 Interval 1914 (956500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1112 Interval 1915 (957000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2501 Interval 1916 (957500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4727 3 episodes - episode_reward: -196.287 [-213.741, -178.836] - loss: 22.389 - mae: 44.502 - mean_q: 21.834 Interval 1917 (958000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1648 1 episodes - episode_reward: -390.972 [-390.972, -390.972] - loss: 18.442 - mae: 44.527 - mean_q: 21.618 Interval 1918 (958500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0674 2 episodes - episode_reward: -5.487 [-206.302, 195.329] - loss: 16.606 - mae: 44.583 - mean_q: 20.749 Interval 1919 (959000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6948 3 episodes - episode_reward: -100.125 [-229.141, 149.787] - loss: 15.702 - mae: 43.829 - mean_q: 21.606 Interval 1920 (959500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5408 1 episodes - episode_reward: -347.228 [-347.228, -347.228] - loss: 15.683 - mae: 44.043 - mean_q: 20.805 Interval 1921 (960000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3947 1 episodes - episode_reward: -116.520 [-116.520, -116.520] - loss: 15.614 - mae: 43.756 - mean_q: 20.339 Interval 1922 (960500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9901 2 episodes - episode_reward: -304.218 [-442.826, -165.609] - loss: 18.677 - mae: 43.626 - mean_q: 20.705 Interval 1923 (961000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3190 Interval 1924 (961500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0518 Interval 1925 (962000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3022 Interval 1926 (962500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3809 1 episodes - episode_reward: -507.571 [-507.571, -507.571] - loss: 16.372 - mae: 42.472 - mean_q: 22.812 Interval 1927 (963000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0933 Interval 1928 (963500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6849 3 episodes - episode_reward: -445.727 [-820.268, -179.468] - loss: 17.072 - mae: 42.684 - mean_q: 22.030 Interval 1929 (964000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2086 Interval 1930 (964500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2123 2 episodes - episode_reward: -347.046 [-452.504, -241.588] - loss: 14.750 - mae: 41.971 - mean_q: 22.768 Interval 1931 (965000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0754 Interval 1932 (965500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1095 Interval 1933 (966000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3470 Interval 1934 (966500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2670 6 episodes - episode_reward: -260.410 [-978.959, -33.182] - loss: 15.723 - mae: 41.943 - mean_q: 19.887 Interval 1935 (967000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4623 5 episodes - episode_reward: -406.614 [-844.712, -139.240] - loss: 15.807 - mae: 42.318 - mean_q: 19.243 Interval 1936 (967500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4651 3 episodes - episode_reward: -78.093 [-262.201, 206.033] - loss: 17.572 - mae: 42.346 - mean_q: 19.976 Interval 1937 (968000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2959 1 episodes - episode_reward: -140.915 [-140.915, -140.915] - loss: 17.215 - mae: 42.135 - mean_q: 19.594 Interval 1938 (968500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1772 Interval 1939 (969000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6210 2 episodes - episode_reward: -450.227 [-788.407, -112.047] - loss: 14.590 - mae: 42.237 - mean_q: 21.076 Interval 1940 (969500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6591 1 episodes - episode_reward: -355.843 [-355.843, -355.843] - loss: 18.126 - mae: 42.242 - mean_q: 20.384 Interval 1941 (970000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1632 Interval 1942 (970500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1823 Interval 1943 (971000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1424 Interval 1944 (971500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0203 1 episodes - episode_reward: -156.083 [-156.083, -156.083] - loss: 14.762 - mae: 41.660 - mean_q: 21.508 Interval 1945 (972000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4614 1 episodes - episode_reward: -317.212 [-317.212, -317.212] - loss: 16.997 - mae: 41.959 - mean_q: 21.719 Interval 1946 (972500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0054 Interval 1947 (973000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0896 Interval 1948 (973500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9200 6 episodes - episode_reward: -81.044 [-165.817, 98.418] - loss: 19.259 - mae: 41.443 - mean_q: 22.209 Interval 1949 (974000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6318 2 episodes - episode_reward: -138.503 [-191.352, -85.655] - loss: 14.419 - mae: 41.012 - mean_q: 24.193 Interval 1950 (974500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1652 4 episodes - episode_reward: -147.008 [-212.721, -104.271] - loss: 13.223 - mae: 40.807 - mean_q: 23.891 Interval 1951 (975000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1810 8 episodes - episode_reward: -198.196 [-316.525, -100.000] - loss: 16.741 - mae: 40.762 - mean_q: 23.301 Interval 1952 (975500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2650 4 episodes - episode_reward: -184.557 [-230.950, -144.077] - loss: 16.367 - mae: 40.709 - mean_q: 22.416 Interval 1953 (976000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3740 5 episodes - episode_reward: -122.023 [-339.074, 177.778] - loss: 19.494 - mae: 40.572 - mean_q: 22.179 Interval 1954 (976500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0101 Interval 1955 (977000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6541 1 episodes - episode_reward: -395.937 [-395.937, -395.937] - loss: 20.083 - mae: 40.311 - mean_q: 22.111 Interval 1956 (977500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3510 3 episodes - episode_reward: -221.829 [-355.014, -145.907] - loss: 22.482 - mae: 40.356 - mean_q: 22.503 Interval 1957 (978000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0701 1 episodes - episode_reward: 139.101 [139.101, 139.101] - loss: 21.519 - mae: 40.838 - mean_q: 23.794 Interval 1958 (978500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9630 1 episodes - episode_reward: -668.710 [-668.710, -668.710] - loss: 21.944 - mae: 41.231 - mean_q: 24.485 Interval 1959 (979000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2261 2 episodes - episode_reward: 7.175 [-212.357, 226.707] - loss: 19.507 - mae: 41.887 - mean_q: 23.807 Interval 1960 (979500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2637 1 episodes - episode_reward: -67.361 [-67.361, -67.361] - loss: 21.328 - mae: 41.891 - mean_q: 24.209 Interval 1961 (980000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0077 1 episodes - episode_reward: -168.167 [-168.167, -168.167] - loss: 20.133 - mae: 42.276 - mean_q: 25.268 Interval 1962 (980500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4260 1 episodes - episode_reward: 207.912 [207.912, 207.912] - loss: 18.582 - mae: 42.128 - mean_q: 27.773 Interval 1963 (981000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2978 1 episodes - episode_reward: 250.521 [250.521, 250.521] - loss: 19.227 - mae: 42.978 - mean_q: 27.612 Interval 1964 (981500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0295 2 episodes - episode_reward: -5.364 [-163.732, 153.004] - loss: 23.207 - mae: 43.118 - mean_q: 29.867 Interval 1965 (982000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9746 3 episodes - episode_reward: -152.227 [-174.255, -115.185] - loss: 21.317 - mae: 44.001 - mean_q: 30.791 Interval 1966 (982500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1374 Interval 1967 (983000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.2639 3 episodes - episode_reward: 65.655 [-108.595, 173.696] - loss: 21.276 - mae: 45.523 - mean_q: 32.951 Interval 1968 (983500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0551 Interval 1969 (984000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0092 Interval 1970 (984500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2968 1 episodes - episode_reward: 142.101 [142.101, 142.101] - loss: 23.757 - mae: 47.408 - mean_q: 36.484 Interval 1971 (985000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5717 2 episodes - episode_reward: -122.159 [-193.360, -50.958] - loss: 17.887 - mae: 48.141 - mean_q: 37.884 Interval 1972 (985500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3069 Interval 1973 (986000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1901 2 episodes - episode_reward: -379.558 [-400.775, -358.342] - loss: 22.599 - mae: 49.636 - mean_q: 41.030 Interval 1974 (986500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8594 1 episodes - episode_reward: -324.867 [-324.867, -324.867] - loss: 24.145 - mae: 50.186 - mean_q: 41.937 Interval 1975 (987000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3725 1 episodes - episode_reward: -263.200 [-263.200, -263.200] - loss: 20.145 - mae: 50.621 - mean_q: 41.650 Interval 1976 (987500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0249 Interval 1977 (988000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1576 Interval 1978 (988500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3438 3 episodes - episode_reward: -94.647 [-172.677, 21.304] - loss: 16.997 - mae: 51.658 - mean_q: 44.500 Interval 1979 (989000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4563 2 episodes - episode_reward: -113.308 [-190.758, -35.858] - loss: 16.166 - mae: 52.397 - mean_q: 44.932 Interval 1980 (989500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0396 Interval 1981 (990000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0434 Interval 1982 (990500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3279 1 episodes - episode_reward: 152.987 [152.987, 152.987] - loss: 20.399 - mae: 53.066 - mean_q: 46.903 Interval 1983 (991000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0907 Interval 1984 (991500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5204 3 episodes - episode_reward: -86.201 [-193.786, 118.462] - loss: 21.886 - mae: 52.742 - mean_q: 46.328 Interval 1985 (992000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4696 2 episodes - episode_reward: -133.710 [-175.240, -92.180] - loss: 18.958 - mae: 52.962 - mean_q: 45.671 Interval 1986 (992500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1257 Interval 1987 (993000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8740 3 episodes - episode_reward: -321.044 [-458.022, -168.643] - loss: 19.994 - mae: 53.485 - mean_q: 45.567 Interval 1988 (993500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0209 Interval 1989 (994000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0000 3 episodes - episode_reward: -336.371 [-589.318, -160.794] - loss: 20.977 - mae: 53.384 - mean_q: 45.272 Interval 1990 (994500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7510 1 episodes - episode_reward: -338.035 [-338.035, -338.035] - loss: 23.395 - mae: 53.703 - mean_q: 44.866 Interval 1991 (995000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4192 7 episodes - episode_reward: -316.143 [-576.821, -101.278] - loss: 22.277 - mae: 53.808 - mean_q: 43.923 Interval 1992 (995500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7204 8 episodes - episode_reward: -235.641 [-391.062, -115.323] - loss: 22.641 - mae: 54.369 - mean_q: 42.949 Interval 1993 (996000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6598 6 episodes - episode_reward: -189.481 [-363.305, 22.512] - loss: 21.062 - mae: 55.070 - mean_q: 43.260 Interval 1994 (996500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5871 10 episodes - episode_reward: -249.115 [-383.448, -122.453] - loss: 28.002 - mae: 54.848 - mean_q: 43.705 Interval 1995 (997000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7800 6 episodes - episode_reward: -281.020 [-528.911, -76.418] - loss: 21.939 - mae: 54.975 - mean_q: 42.624 Interval 1996 (997500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.1311 10 episodes - episode_reward: -323.455 [-476.873, -124.532] - loss: 24.044 - mae: 54.834 - mean_q: 40.989 Interval 1997 (998000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9007 6 episodes - episode_reward: -241.692 [-537.419, -25.582] - loss: 21.337 - mae: 54.346 - mean_q: 40.142 Interval 1998 (998500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3477 5 episodes - episode_reward: -150.897 [-198.801, -126.663] - loss: 26.647 - mae: 55.432 - mean_q: 40.744 Interval 1999 (999000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0574 7 episodes - episode_reward: -139.806 [-179.645, -67.892] - loss: 28.151 - mae: 55.748 - mean_q: 40.932 Interval 2000 (999500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4920 2 episodes - episode_reward: -124.220 [-150.166, -98.274] - loss: 28.921 - mae: 55.498 - mean_q: 40.600 Interval 2001 (1000000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6667 5 episodes - episode_reward: -160.435 [-217.710, -55.613] - loss: 30.307 - mae: 55.645 - mean_q: 40.134 Interval 2002 (1000500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3272 4 episodes - episode_reward: -174.049 [-208.023, -131.682] - loss: 24.931 - mae: 55.745 - mean_q: 39.228 Interval 2003 (1001000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3510 1 episodes - episode_reward: -51.993 [-51.993, -51.993] - loss: 24.670 - mae: 55.224 - mean_q: 37.099 Interval 2004 (1001500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5582 1 episodes - episode_reward: -368.035 [-368.035, -368.035] - loss: 26.841 - mae: 55.336 - mean_q: 36.869 Interval 2005 (1002000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7525 1 episodes - episode_reward: -260.730 [-260.730, -260.730] - loss: 27.783 - mae: 55.356 - mean_q: 36.580 Interval 2006 (1002500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4171 4 episodes - episode_reward: -193.120 [-225.112, -172.346] - loss: 28.512 - mae: 55.301 - mean_q: 37.344 Interval 2007 (1003000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1853 1 episodes - episode_reward: -243.658 [-243.658, -243.658] - loss: 23.335 - mae: 55.307 - mean_q: 36.935 Interval 2008 (1003500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2740 2 episodes - episode_reward: -14.820 [-218.149, 188.510] - loss: 24.005 - mae: 55.381 - mean_q: 37.956 Interval 2009 (1004000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2358 2 episodes - episode_reward: -212.348 [-224.917, -199.779] - loss: 23.536 - mae: 56.062 - mean_q: 37.447 Interval 2010 (1004500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2261 5 episodes - episode_reward: -234.536 [-300.185, -195.487] - loss: 25.735 - mae: 56.091 - mean_q: 37.278 Interval 2011 (1005000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4878 3 episodes - episode_reward: -306.058 [-609.773, -152.386] - loss: 24.665 - mae: 56.354 - mean_q: 37.581 Interval 2012 (1005500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0522 Interval 2013 (1006000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5221 5 episodes - episode_reward: -149.093 [-204.641, -74.597] - loss: 25.039 - mae: 56.744 - mean_q: 37.912 Interval 2014 (1006500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1302 Interval 2015 (1007000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0298 Interval 2016 (1007500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7874 3 episodes - episode_reward: -144.975 [-194.749, -71.046] - loss: 26.014 - mae: 58.557 - mean_q: 41.008 Interval 2017 (1008000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4419 2 episodes - episode_reward: -162.776 [-225.551, -100.000] - loss: 27.176 - mae: 59.651 - mean_q: 40.965 Interval 2018 (1008500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3358 2 episodes - episode_reward: 20.764 [-164.774, 206.302] - loss: 29.102 - mae: 60.138 - mean_q: 41.231 Interval 2019 (1009000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1594 1 episodes - episode_reward: -219.818 [-219.818, -219.818] - loss: 26.191 - mae: 60.697 - mean_q: 41.973 Interval 2020 (1009500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2767 2 episodes - episode_reward: -80.148 [-306.590, 146.294] - loss: 26.965 - mae: 61.058 - mean_q: 42.945 Interval 2021 (1010000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3125 4 episodes - episode_reward: -148.180 [-245.592, 1.309] - loss: 28.941 - mae: 61.775 - mean_q: 43.889 Interval 2022 (1010500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6685 2 episodes - episode_reward: -233.921 [-236.441, -231.401] - loss: 26.151 - mae: 62.612 - mean_q: 45.066 Interval 2023 (1011000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7949 3 episodes - episode_reward: -104.381 [-172.691, 25.136] - loss: 28.485 - mae: 63.269 - mean_q: 44.647 Interval 2024 (1011500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1377 1 episodes - episode_reward: -85.014 [-85.014, -85.014] - loss: 30.654 - mae: 64.730 - mean_q: 46.387 Interval 2025 (1012000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8329 3 episodes - episode_reward: -125.196 [-165.947, -47.616] - loss: 32.617 - mae: 66.271 - mean_q: 47.779 Interval 2026 (1012500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2753 4 episodes - episode_reward: -149.085 [-192.998, -52.684] - loss: 36.222 - mae: 66.801 - mean_q: 48.868 Interval 2027 (1013000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9504 5 episodes - episode_reward: -194.025 [-268.618, -100.000] - loss: 36.029 - mae: 67.862 - mean_q: 49.570 Interval 2028 (1013500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8646 9 episodes - episode_reward: -162.532 [-241.939, -25.335] - loss: 35.137 - mae: 68.850 - mean_q: 49.495 Interval 2029 (1014000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0038 3 episodes - episode_reward: -171.463 [-233.030, -129.173] - loss: 37.708 - mae: 69.645 - mean_q: 50.495 Interval 2030 (1014500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0320 Interval 2031 (1015000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0773 Interval 2032 (1015500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2829 2 episodes - episode_reward: -44.211 [-233.422, 145.000] - loss: 38.096 - mae: 73.689 - mean_q: 55.325 Interval 2033 (1016000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4395 4 episodes - episode_reward: -181.742 [-210.642, -152.763] - loss: 39.047 - mae: 75.432 - mean_q: 57.701 Interval 2034 (1016500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2258 4 episodes - episode_reward: -156.793 [-369.055, -54.814] - loss: 42.026 - mae: 77.362 - mean_q: 60.641 Interval 2035 (1017000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1541 3 episodes - episode_reward: -185.390 [-309.552, -121.876] - loss: 44.566 - mae: 79.814 - mean_q: 65.760 Interval 2036 (1017500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2620 4 episodes - episode_reward: -268.445 [-514.415, -104.111] - loss: 69.805 - mae: 82.862 - mean_q: 67.996 Interval 2037 (1018000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7705 3 episodes - episode_reward: -337.950 [-529.597, -192.403] - loss: 52.104 - mae: 85.686 - mean_q: 71.911 Interval 2038 (1018500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1054 4 episodes - episode_reward: -133.379 [-298.958, -33.569] - loss: 57.557 - mae: 88.023 - mean_q: 74.193 Interval 2039 (1019000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1888 5 episodes - episode_reward: -209.836 [-381.893, -25.995] - loss: 48.322 - mae: 90.285 - mean_q: 76.402 Interval 2040 (1019500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2706 2 episodes - episode_reward: -86.761 [-105.277, -68.245] - loss: 58.493 - mae: 93.964 - mean_q: 81.548 Interval 2041 (1020000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2396 2 episodes - episode_reward: -58.812 [-138.253, 20.630] - loss: 72.464 - mae: 98.219 - mean_q: 88.079 Interval 2042 (1020500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4760 3 episodes - episode_reward: -79.272 [-118.490, -41.134] - loss: 82.944 - mae: 103.682 - mean_q: 94.681 Interval 2043 (1021000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3964 4 episodes - episode_reward: -168.566 [-444.961, -50.477] - loss: 90.456 - mae: 108.834 - mean_q: 103.303 Interval 2044 (1021500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9293 1 episodes - episode_reward: -513.042 [-513.042, -513.042] - loss: 97.319 - mae: 116.544 - mean_q: 117.795 Interval 2045 (1022000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2414 3 episodes - episode_reward: -160.391 [-357.108, -58.075] - loss: 121.631 - mae: 124.310 - mean_q: 132.459 Interval 2046 (1022500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6104 7 episodes - episode_reward: -124.944 [-186.998, -21.845] - loss: 125.862 - mae: 129.460 - mean_q: 138.937 Interval 2047 (1023000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0264 6 episodes - episode_reward: -170.786 [-271.289, -126.611] - loss: 108.018 - mae: 132.366 - mean_q: 141.072 Interval 2048 (1023500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2339 2 episodes - episode_reward: -81.411 [-151.698, -11.125] - loss: 100.039 - mae: 135.581 - mean_q: 144.301 Interval 2049 (1024000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5393 5 episodes - episode_reward: -136.373 [-209.493, -64.764] - loss: 108.752 - mae: 138.551 - mean_q: 147.543 Interval 2050 (1024500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8664 2 episodes - episode_reward: -236.184 [-394.907, -77.461] - loss: 134.840 - mae: 137.485 - mean_q: 145.712 Interval 2051 (1025000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1854 1 episodes - episode_reward: -149.488 [-149.488, -149.488] - loss: 139.528 - mae: 139.554 - mean_q: 147.825 Interval 2052 (1025500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3038 7 episodes - episode_reward: -148.421 [-303.480, -44.628] - loss: 112.143 - mae: 143.083 - mean_q: 153.747 Interval 2053 (1026000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4645 2 episodes - episode_reward: -156.946 [-230.375, -83.517] - loss: 120.898 - mae: 143.239 - mean_q: 153.184 Interval 2054 (1026500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0446 Interval 2055 (1027000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5782 2 episodes - episode_reward: -12.855 [-174.771, 149.061] - loss: 118.152 - mae: 145.314 - mean_q: 155.823 Interval 2056 (1027500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0972 4 episodes - episode_reward: -195.373 [-444.640, -57.470] - loss: 97.851 - mae: 145.210 - mean_q: 154.293 Interval 2057 (1028000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4601 2 episodes - episode_reward: -123.451 [-160.112, -86.790] - loss: 130.680 - mae: 145.691 - mean_q: 154.884 Interval 2058 (1028500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1088 Interval 2059 (1029000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1017 Interval 2060 (1029500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1509 Interval 2061 (1030000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0715 Interval 2062 (1030500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3247 2 episodes - episode_reward: -133.853 [-281.588, 13.883] - loss: 101.581 - mae: 151.378 - mean_q: 164.936 Interval 2063 (1031000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0759 Interval 2064 (1031500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0417 Interval 2065 (1032000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2056 1 episodes - episode_reward: 62.192 [62.192, 62.192] - loss: 109.011 - mae: 156.392 - mean_q: 172.759 Interval 2066 (1032500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2999 Interval 2067 (1033000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2411 Interval 2068 (1033500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1982 Interval 2069 (1034000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0369 Interval 2070 (1034500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1880 Interval 2071 (1035000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1770 Interval 2072 (1035500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2367 Interval 2073 (1036000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0820 Interval 2074 (1036500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1828 Interval 2075 (1037000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.2205 Interval 2076 (1037500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1793 Interval 2077 (1038000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1593 Interval 2078 (1038500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2302 Interval 2079 (1039000 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.1193 Interval 2080 (1039500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.2494 Interval 2081 (1040000 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.0916 Interval 2082 (1040500 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.1881 Interval 2083 (1041000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1980 Interval 2084 (1041500 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.2113 Interval 2085 (1042000 steps performed) 500/500 [==============================] - 9s 17ms/step - reward: -0.1112 Interval 2086 (1042500 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1703 Interval 2087 (1043000 steps performed) 500/500 [==============================] - 9s 19ms/step - reward: -0.2410 Interval 2088 (1043500 steps performed) 500/500 [==============================] - 10s 19ms/step - reward: -0.1916 Interval 2089 (1044000 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.1276 Interval 2090 (1044500 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.1374 Interval 2091 (1045000 steps performed) 500/500 [==============================] - 10s 21ms/step - reward: -0.2693 Interval 2092 (1045500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.6760 1 episodes - episode_reward: -2699.037 [-2699.037, -2699.037] - loss: 95.144 - mae: 189.820 - mean_q: 223.395 Interval 2093 (1046000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1790 Interval 2094 (1046500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9206 1 episodes - episode_reward: -501.984 [-501.984, -501.984] - loss: 92.978 - mae: 191.634 - mean_q: 227.980 Interval 2095 (1047000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0879 Interval 2096 (1047500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2451 Interval 2097 (1048000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0587 Interval 2098 (1048500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.1026 1 episodes - episode_reward: -1203.536 [-1203.536, -1203.536] - loss: 90.571 - mae: 191.742 - mean_q: 232.041 Interval 2099 (1049000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5364 1 episodes - episode_reward: -183.972 [-183.972, -183.972] - loss: 90.964 - mae: 190.737 - mean_q: 230.180 Interval 2100 (1049500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2447 1 episodes - episode_reward: -1823.943 [-1823.943, -1823.943] - loss: 88.561 - mae: 188.977 - mean_q: 227.557 Interval 2101 (1050000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7487 1 episodes - episode_reward: -293.842 [-293.842, -293.842] - loss: 80.451 - mae: 187.928 - mean_q: 226.490 Interval 2102 (1050500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2040 Interval 2103 (1051000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0912 Interval 2104 (1051500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0016 2 episodes - episode_reward: -357.825 [-612.165, -103.485] - loss: 76.355 - mae: 182.145 - mean_q: 218.346 Interval 2105 (1052000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2316 Interval 2106 (1052500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1721 Interval 2107 (1053000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1748 3 episodes - episode_reward: -593.519 [-609.832, -571.553] - loss: 69.028 - mae: 178.037 - mean_q: 212.389 Interval 2108 (1053500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0578 Interval 2109 (1054000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0773 Interval 2110 (1054500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1422 Interval 2111 (1055000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1247 Interval 2112 (1055500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2126 Interval 2113 (1056000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0738 Interval 2114 (1056500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1614 Interval 2115 (1057000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: 0.1663 1 episodes - episode_reward: -287.545 [-287.545, -287.545] - loss: 61.552 - mae: 167.267 - mean_q: 199.962 Interval 2116 (1057500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1007 Interval 2117 (1058000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4805 3 episodes - episode_reward: -423.120 [-505.892, -354.000] - loss: 62.326 - mae: 165.453 - mean_q: 197.833 Interval 2118 (1058500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1665 Interval 2119 (1059000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2249 Interval 2120 (1059500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0558 Interval 2121 (1060000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6350 2 episodes - episode_reward: -191.311 [-420.198, 37.577] - loss: 63.898 - mae: 159.376 - mean_q: 189.053 Interval 2122 (1060500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5737 Interval 2123 (1061000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3574 1 episodes - episode_reward: -834.735 [-834.735, -834.735] - loss: 58.932 - mae: 160.206 - mean_q: 190.754 Interval 2124 (1061500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5975 2 episodes - episode_reward: -714.283 [-1006.143, -422.423] - loss: 53.615 - mae: 160.130 - mean_q: 190.155 Interval 2125 (1062000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2587 Interval 2126 (1062500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4928 Interval 2127 (1063000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7730 1 episodes - episode_reward: -1215.015 [-1215.015, -1215.015] - loss: 56.924 - mae: 157.589 - mean_q: 186.338 Interval 2128 (1063500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1994 Interval 2129 (1064000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2013 Interval 2130 (1064500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1601 Interval 2131 (1065000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1797 Interval 2132 (1065500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1695 Interval 2133 (1066000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1907 Interval 2134 (1066500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2081 Interval 2135 (1067000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1515 Interval 2136 (1067500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.8844 1 episodes - episode_reward: -1224.642 [-1224.642, -1224.642] - loss: 37.863 - mae: 154.928 - mean_q: 186.815 Interval 2137 (1068000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4027 Interval 2138 (1068500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6751 2 episodes - episode_reward: -485.364 [-579.761, -390.967] - loss: 39.525 - mae: 153.041 - mean_q: 185.108 Interval 2139 (1069000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7525 2 episodes - episode_reward: -444.598 [-453.506, -435.690] - loss: 38.430 - mae: 152.220 - mean_q: 183.857 Interval 2140 (1069500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1632 Interval 2141 (1070000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1671 Interval 2142 (1070500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1672 Interval 2143 (1071000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2254 Interval 2144 (1071500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1598 Interval 2145 (1072000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1303 Interval 2146 (1072500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1873 Interval 2147 (1073000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1493 Interval 2148 (1073500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1632 Interval 2149 (1074000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2682 Interval 2150 (1074500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1436 Interval 2151 (1075000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1469 Interval 2152 (1075500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0940 Interval 2153 (1076000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1934 Interval 2154 (1076500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.8410 1 episodes - episode_reward: -1599.160 [-1599.160, -1599.160] - loss: 23.994 - mae: 136.137 - mean_q: 171.661 Interval 2155 (1077000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1491 Interval 2156 (1077500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1218 Interval 2157 (1078000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2921 Interval 2158 (1078500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5730 2 episodes - episode_reward: -563.939 [-739.956, -387.922] - loss: 22.377 - mae: 130.712 - mean_q: 164.881 Interval 2159 (1079000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5661 Interval 2160 (1079500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4576 3 episodes - episode_reward: -483.096 [-902.612, -209.440] - loss: 20.430 - mae: 126.753 - mean_q: 157.417 Interval 2161 (1080000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8647 1 episodes - episode_reward: -1957.958 [-1957.958, -1957.958] - loss: 18.515 - mae: 124.596 - mean_q: 153.207 Interval 2162 (1080500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7781 1 episodes - episode_reward: -1942.719 [-1942.719, -1942.719] - loss: 17.707 - mae: 123.457 - mean_q: 150.109 Interval 2163 (1081000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8301 4 episodes - episode_reward: -217.039 [-391.316, -129.672] - loss: 21.761 - mae: 121.942 - mean_q: 146.838 Interval 2164 (1081500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5159 4 episodes - episode_reward: -210.796 [-284.028, -73.124] - loss: 20.005 - mae: 120.835 - mean_q: 144.336 Interval 2165 (1082000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6260 3 episodes - episode_reward: -249.124 [-440.838, -79.166] - loss: 27.368 - mae: 120.014 - mean_q: 141.309 Interval 2166 (1082500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3383 3 episodes - episode_reward: -548.095 [-1424.926, -87.989] - loss: 20.754 - mae: 119.078 - mean_q: 140.112 Interval 2167 (1083000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7351 2 episodes - episode_reward: -420.101 [-693.400, -146.802] - loss: 17.603 - mae: 118.297 - mean_q: 135.919 Interval 2168 (1083500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5274 3 episodes - episode_reward: -770.800 [-1503.567, -249.576] - loss: 18.382 - mae: 117.390 - mean_q: 133.447 Interval 2169 (1084000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0759 3 episodes - episode_reward: -183.624 [-205.123, -155.396] - loss: 19.859 - mae: 117.059 - mean_q: 130.128 Interval 2170 (1084500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6294 1 episodes - episode_reward: -263.851 [-263.851, -263.851] - loss: 33.110 - mae: 115.292 - mean_q: 128.417 Interval 2171 (1085000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7326 3 episodes - episode_reward: -300.916 [-437.715, -201.170] - loss: 23.345 - mae: 114.007 - mean_q: 125.109 Interval 2172 (1085500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5274 1 episodes - episode_reward: -740.408 [-740.408, -740.408] - loss: 26.060 - mae: 112.356 - mean_q: 121.963 Interval 2173 (1086000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5524 1 episodes - episode_reward: -263.040 [-263.040, -263.040] - loss: 24.824 - mae: 111.244 - mean_q: 121.015 Interval 2174 (1086500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4395 1 episodes - episode_reward: -142.853 [-142.853, -142.853] - loss: 20.776 - mae: 109.987 - mean_q: 118.985 Interval 2175 (1087000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4994 1 episodes - episode_reward: -338.757 [-338.757, -338.757] - loss: 31.585 - mae: 108.740 - mean_q: 117.963 Interval 2176 (1087500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1645 2 episodes - episode_reward: -268.562 [-305.706, -231.417] - loss: 25.829 - mae: 108.204 - mean_q: 114.624 Interval 2177 (1088000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1385 1 episodes - episode_reward: -613.436 [-613.436, -613.436] - loss: 24.502 - mae: 107.401 - mean_q: 113.161 Interval 2178 (1088500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0908 Interval 2179 (1089000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1373 Interval 2180 (1089500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2870 Interval 2181 (1090000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0644 Interval 2182 (1090500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1605 Interval 2183 (1091000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.3406 Interval 2184 (1091500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6947 3 episodes - episode_reward: -479.683 [-845.863, -283.176] - loss: 30.411 - mae: 122.708 - mean_q: 132.454 Interval 2185 (1092000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2057 Interval 2186 (1092500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1355 Interval 2187 (1093000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0050 Interval 2188 (1093500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3239 Interval 2189 (1094000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2119 Interval 2190 (1094500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2037 Interval 2191 (1095000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1270 Interval 2192 (1095500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2234 Interval 2193 (1096000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1653 Interval 2194 (1096500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.1995 Interval 2195 (1097000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.2033 Interval 2196 (1097500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1515 Interval 2197 (1098000 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.2195 Interval 2198 (1098500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.2291 Interval 2199 (1099000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1791 Interval 2200 (1099500 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.2222 Interval 2201 (1100000 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1848 Interval 2202 (1100500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -1.4168 2 episodes - episode_reward: -1131.535 [-2011.985, -251.085] - loss: 135.308 - mae: 348.941 - mean_q: 443.039 Interval 2203 (1101000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1932 Interval 2204 (1101500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2406 Interval 2205 (1102000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7515 1 episodes - episode_reward: -503.279 [-503.279, -503.279] - loss: 123.860 - mae: 362.967 - mean_q: 461.387 Interval 2206 (1102500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7430 2 episodes - episode_reward: -206.921 [-224.330, -189.513] - loss: 116.291 - mae: 366.810 - mean_q: 467.384 Interval 2207 (1103000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3812 1 episodes - episode_reward: -277.483 [-277.483, -277.483] - loss: 112.839 - mae: 373.737 - mean_q: 479.087 Interval 2208 (1103500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3633 2 episodes - episode_reward: -292.420 [-295.552, -289.288] - loss: 121.787 - mae: 379.657 - mean_q: 486.412 Interval 2209 (1104000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0473 Interval 2210 (1104500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2280 Interval 2211 (1105000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1551 Interval 2212 (1105500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1858 Interval 2213 (1106000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1966 Interval 2214 (1106500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1505 Interval 2215 (1107000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1886 Interval 2216 (1107500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1997 Interval 2217 (1108000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1812 Interval 2218 (1108500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1672 Interval 2219 (1109000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1428 Interval 2220 (1109500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1734 Interval 2221 (1110000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2178 Interval 2222 (1110500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1976 Interval 2223 (1111000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2238 Interval 2224 (1111500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.0950 Interval 2225 (1112000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1657 Interval 2226 (1112500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2068 Interval 2227 (1113000 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1660 Interval 2228 (1113500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1819 Interval 2229 (1114000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.2966 Interval 2230 (1114500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1070 Interval 2231 (1115000 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.2023 Interval 2232 (1115500 steps performed) 500/500 [==============================] - 177s 355ms/step - reward: -0.0569 Interval 2233 (1116000 steps performed) 500/500 [==============================] - 28s 55ms/step - reward: -0.3750 Interval 2234 (1116500 steps performed) 500/500 [==============================] - 1767s 4s/step - reward: -0.3900 1 episodes - episode_reward: -2538.888 [-2538.888, -2538.888] - loss: 96.077 - mae: 393.166 - mean_q: 512.927 Interval 2235 (1117000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2901 Interval 2236 (1117500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1628 Interval 2237 (1118000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1729 1 episodes - episode_reward: -690.876 [-690.876, -690.876] - loss: 111.298 - mae: 437.000 - mean_q: 572.575 Interval 2238 (1118500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0621 Interval 2239 (1119000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2427 Interval 2240 (1119500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1282 Interval 2241 (1120000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1813 Interval 2242 (1120500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1832 Interval 2243 (1121000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1774 2 episodes - episode_reward: -533.103 [-807.987, -258.220] - loss: 138.123 - mae: 427.337 - mean_q: 554.287 Interval 2244 (1121500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2245 Interval 2245 (1122000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1210 Interval 2246 (1122500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1785 Interval 2247 (1123000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1806 Interval 2248 (1123500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5390 3 episodes - episode_reward: -381.389 [-556.601, -50.567] - loss: 115.247 - mae: 381.310 - mean_q: 491.548 Interval 2249 (1124000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5173 3 episodes - episode_reward: -247.524 [-293.316, -179.201] - loss: 190.850 - mae: 371.499 - mean_q: 477.075 Interval 2250 (1124500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3442 1 episodes - episode_reward: -144.605 [-144.605, -144.605] - loss: 70.911 - mae: 364.275 - mean_q: 465.878 Interval 2251 (1125000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2320 3 episodes - episode_reward: -94.453 [-199.927, -15.928] - loss: 103.662 - mae: 356.087 - mean_q: 455.140 Interval 2252 (1125500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1426 3 episodes - episode_reward: -611.796 [-911.506, -313.057] - loss: 165.334 - mae: 349.032 - mean_q: 443.517 Interval 2253 (1126000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1259 Interval 2254 (1126500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2550 1 episodes - episode_reward: -269.343 [-269.343, -269.343] - loss: 81.527 - mae: 334.026 - mean_q: 423.224 Interval 2255 (1127000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9883 4 episodes - episode_reward: -125.965 [-207.647, -48.904] - loss: 125.203 - mae: 325.605 - mean_q: 409.015 Interval 2256 (1127500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6441 2 episodes - episode_reward: -123.810 [-147.602, -100.018] - loss: 105.091 - mae: 316.709 - mean_q: 395.831 Interval 2257 (1128000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1650 Interval 2258 (1128500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1827 Interval 2259 (1129000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1534 Interval 2260 (1129500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1469 Interval 2261 (1130000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1800 Interval 2262 (1130500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1566 Interval 2263 (1131000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1192 Interval 2264 (1131500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1160 Interval 2265 (1132000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2411 Interval 2266 (1132500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1829 Interval 2267 (1133000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1582 Interval 2268 (1133500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1794 Interval 2269 (1134000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3933 1 episodes - episode_reward: -1298.444 [-1298.444, -1298.444] - loss: 77.606 - mae: 277.539 - mean_q: 363.320 Interval 2270 (1134500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1308 Interval 2271 (1135000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1719 Interval 2272 (1135500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1774 Interval 2273 (1136000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1982 Interval 2274 (1136500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3348 Interval 2275 (1137000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2316 1 episodes - episode_reward: -571.570 [-571.570, -571.570] - loss: 82.097 - mae: 334.626 - mean_q: 441.644 Interval 2276 (1137500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1499 Interval 2277 (1138000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2934 Interval 2278 (1138500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3919 3 episodes - episode_reward: -297.773 [-405.815, -217.080] - loss: 69.097 - mae: 341.571 - mean_q: 452.695 Interval 2279 (1139000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3700 1 episodes - episode_reward: -155.116 [-155.116, -155.116] - loss: 100.427 - mae: 339.722 - mean_q: 451.389 Interval 2280 (1139500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0508 3 episodes - episode_reward: -179.365 [-214.688, -110.390] - loss: 105.513 - mae: 339.973 - mean_q: 452.138 Interval 2281 (1140000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4088 3 episodes - episode_reward: -215.379 [-241.278, -183.984] - loss: 116.815 - mae: 337.796 - mean_q: 449.286 Interval 2282 (1140500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3119 3 episodes - episode_reward: -233.416 [-241.998, -223.397] - loss: 107.854 - mae: 338.304 - mean_q: 450.282 Interval 2283 (1141000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3924 1 episodes - episode_reward: -220.636 [-220.636, -220.636] - loss: 123.948 - mae: 334.898 - mean_q: 445.744 Interval 2284 (1141500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9590 2 episodes - episode_reward: -208.726 [-234.513, -182.940] - loss: 92.452 - mae: 333.212 - mean_q: 442.754 Interval 2285 (1142000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3192 3 episodes - episode_reward: -239.122 [-244.500, -230.934] - loss: 100.108 - mae: 330.973 - mean_q: 439.798 Interval 2286 (1142500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9850 2 episodes - episode_reward: -212.165 [-223.356, -200.975] - loss: 110.998 - mae: 326.192 - mean_q: 432.807 Interval 2287 (1143000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0139 Interval 2288 (1143500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1372 Interval 2289 (1144000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1889 Interval 2290 (1144500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1970 Interval 2291 (1145000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1019 Interval 2292 (1145500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1715 Interval 2293 (1146000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1753 Interval 2294 (1146500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2046 Interval 2295 (1147000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1492 Interval 2296 (1147500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1650 Interval 2297 (1148000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2184 Interval 2298 (1148500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1720 Interval 2299 (1149000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1628 Interval 2300 (1149500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.3201 Interval 2301 (1150000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.0456 Interval 2302 (1150500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1504 Interval 2303 (1151000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2082 Interval 2304 (1151500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.3257 Interval 2305 (1152000 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.0328 Interval 2306 (1152500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1984 Interval 2307 (1153000 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.2114 Interval 2308 (1153500 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.1541 Interval 2309 (1154000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1981 Interval 2310 (1154500 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1453 Interval 2311 (1155000 steps performed) 500/500 [==============================] - 9s 19ms/step - reward: -0.2077 Interval 2312 (1155500 steps performed) 500/500 [==============================] - 9s 19ms/step - reward: -0.2762 Interval 2313 (1156000 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.0168 Interval 2314 (1156500 steps performed) 500/500 [==============================] - 10s 21ms/step - reward: -0.3183 Interval 2315 (1157000 steps performed) 500/500 [==============================] - 11s 21ms/step - reward: -0.1059 Interval 2316 (1157500 steps performed) 500/500 [==============================] - 11s 22ms/step - reward: -0.1791 Interval 2317 (1158000 steps performed) 500/500 [==============================] - 11s 22ms/step - reward: -0.2044 Interval 2318 (1158500 steps performed) 500/500 [==============================] - 11s 22ms/step - reward: -0.1553 Interval 2319 (1159000 steps performed) 500/500 [==============================] - 12s 24ms/step - reward: -0.2565 Interval 2320 (1159500 steps performed) 500/500 [==============================] - 12s 23ms/step - reward: -0.0610 Interval 2321 (1160000 steps performed) 500/500 [==============================] - 13s 25ms/step - reward: -0.1969 Interval 2322 (1160500 steps performed) 500/500 [==============================] - 12s 24ms/step - reward: -0.2397 Interval 2323 (1161000 steps performed) 500/500 [==============================] - 12s 24ms/step - reward: -0.0281 Interval 2324 (1161500 steps performed) 500/500 [==============================] - 12s 25ms/step - reward: -0.3172 Interval 2325 (1162000 steps performed) 500/500 [==============================] - 13s 25ms/step - reward: -0.2467 Interval 2326 (1162500 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -2.3520 1 episodes - episode_reward: -4760.044 [-4760.044, -4760.044] - loss: 27.889 - mae: 143.256 - mean_q: 186.429 Interval 2327 (1163000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3164 Interval 2328 (1163500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1206 Interval 2329 (1164000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1265 Interval 2330 (1164500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8444 1 episodes - episode_reward: -1129.696 [-1129.696, -1129.696] - loss: 25.832 - mae: 130.481 - mean_q: 167.728 Interval 2331 (1165000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0604 3 episodes - episode_reward: -191.710 [-302.533, -105.577] - loss: 35.205 - mae: 126.762 - mean_q: 161.778 Interval 2332 (1165500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2704 4 episodes - episode_reward: -169.523 [-335.232, -100.000] - loss: 26.493 - mae: 123.741 - mean_q: 156.733 Interval 2333 (1166000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5108 3 episodes - episode_reward: -206.459 [-355.317, -119.129] - loss: 31.631 - mae: 120.965 - mean_q: 153.426 Interval 2334 (1166500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5908 5 episodes - episode_reward: -282.617 [-379.823, -100.000] - loss: 26.978 - mae: 119.555 - mean_q: 148.318 Interval 2335 (1167000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4610 1 episodes - episode_reward: 218.985 [218.985, 218.985] - loss: 35.119 - mae: 116.803 - mean_q: 145.513 Interval 2336 (1167500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1246 Interval 2337 (1168000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2015 Interval 2338 (1168500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3707 Interval 2339 (1169000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9098 2 episodes - episode_reward: -357.570 [-620.086, -95.054] - loss: 27.473 - mae: 105.045 - mean_q: 128.517 Interval 2340 (1169500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0297 1 episodes - episode_reward: -520.082 [-520.082, -520.082] - loss: 28.716 - mae: 102.536 - mean_q: 125.010 Interval 2341 (1170000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1563 Interval 2342 (1170500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2312 Interval 2343 (1171000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.2376 1 episodes - episode_reward: -1334.236 [-1334.236, -1334.236] - loss: 25.593 - mae: 97.800 - mean_q: 117.985 Interval 2344 (1171500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2089 Interval 2345 (1172000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0447 Interval 2346 (1172500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8732 2 episodes - episode_reward: -558.237 [-873.361, -243.114] - loss: 26.638 - mae: 92.628 - mean_q: 107.666 Interval 2347 (1173000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0840 1 episodes - episode_reward: -538.422 [-538.422, -538.422] - loss: 25.673 - mae: 91.334 - mean_q: 107.603 Interval 2348 (1173500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2027 Interval 2349 (1174000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5581 2 episodes - episode_reward: -500.325 [-612.604, -388.046] - loss: 29.713 - mae: 90.025 - mean_q: 107.701 Interval 2350 (1174500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0802 2 episodes - episode_reward: -464.339 [-618.311, -310.366] - loss: 24.865 - mae: 89.902 - mean_q: 107.038 Interval 2351 (1175000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1943 1 episodes - episode_reward: -529.979 [-529.979, -529.979] - loss: 26.698 - mae: 90.481 - mean_q: 107.325 Interval 2352 (1175500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0168 Interval 2353 (1176000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1885 Interval 2354 (1176500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1705 Interval 2355 (1177000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1283 Interval 2356 (1177500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1662 Interval 2357 (1178000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2210 Interval 2358 (1178500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1552 Interval 2359 (1179000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1427 Interval 2360 (1179500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.1417 Interval 2361 (1180000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4682 1 episodes - episode_reward: -474.167 [-474.167, -474.167] - loss: 84.750 - mae: 80.898 - mean_q: 95.977 Interval 2362 (1180500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0688 1 episodes - episode_reward: 223.982 [223.982, 223.982] - loss: 75.654 - mae: 79.571 - mean_q: 94.776 Interval 2363 (1181000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6643 3 episodes - episode_reward: -129.638 [-245.655, -28.599] - loss: 82.864 - mae: 78.270 - mean_q: 93.290 Interval 2364 (1181500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5416 1 episodes - episode_reward: -105.764 [-105.764, -105.764] - loss: 46.825 - mae: 77.563 - mean_q: 92.249 Interval 2365 (1182000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5377 Interval 2366 (1182500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8069 2 episodes - episode_reward: -736.798 [-1379.564, -94.033] - loss: 38.718 - mae: 73.863 - mean_q: 86.503 Interval 2367 (1183000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0076 1 episodes - episode_reward: -413.001 [-413.001, -413.001] - loss: 52.362 - mae: 73.232 - mean_q: 85.059 Interval 2368 (1183500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2667 1 episodes - episode_reward: -47.078 [-47.078, -47.078] - loss: 58.298 - mae: 72.806 - mean_q: 84.657 Interval 2369 (1184000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4493 1 episodes - episode_reward: -262.577 [-262.577, -262.577] - loss: 74.707 - mae: 72.604 - mean_q: 85.255 Interval 2370 (1184500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7544 1 episodes - episode_reward: -90.636 [-90.636, -90.636] - loss: 63.059 - mae: 72.496 - mean_q: 85.512 Interval 2371 (1185000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4372 3 episodes - episode_reward: -828.588 [-2225.044, -123.093] - loss: 46.047 - mae: 71.334 - mean_q: 82.673 Interval 2372 (1185500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0116 Interval 2373 (1186000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0237 Interval 2374 (1186500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1814 3 episodes - episode_reward: -33.800 [-110.695, 87.791] - loss: 51.044 - mae: 70.158 - mean_q: 81.195 Interval 2375 (1187000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8391 1 episodes - episode_reward: -395.526 [-395.526, -395.526] - loss: 41.349 - mae: 68.238 - mean_q: 78.011 Interval 2376 (1187500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0486 2 episodes - episode_reward: -254.521 [-325.573, -183.468] - loss: 48.802 - mae: 67.661 - mean_q: 77.366 Interval 2377 (1188000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8258 6 episodes - episode_reward: -246.609 [-510.571, -103.038] - loss: 45.408 - mae: 68.049 - mean_q: 77.263 Interval 2378 (1188500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7573 2 episodes - episode_reward: -163.208 [-249.164, -77.252] - loss: 40.149 - mae: 67.675 - mean_q: 75.436 Interval 2379 (1189000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1426 Interval 2380 (1189500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.3178 1 episodes - episode_reward: 148.402 [148.402, 148.402] - loss: 36.178 - mae: 67.771 - mean_q: 75.700 Interval 2381 (1190000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7940 2 episodes - episode_reward: -190.928 [-190.990, -190.867] - loss: 47.236 - mae: 67.305 - mean_q: 75.397 Interval 2382 (1190500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0702 Interval 2383 (1191000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0407 Interval 2384 (1191500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3854 Interval 2385 (1192000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0426 1 episodes - episode_reward: -127.448 [-127.448, -127.448] - loss: 30.753 - mae: 67.636 - mean_q: 75.944 Interval 2386 (1192500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0802 Interval 2387 (1193000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2447 1 episodes - episode_reward: 147.237 [147.237, 147.237] - loss: 33.869 - mae: 66.859 - mean_q: 74.309 Interval 2388 (1193500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7347 1 episodes - episode_reward: -333.550 [-333.550, -333.550] - loss: 41.288 - mae: 67.189 - mean_q: 72.714 Interval 2389 (1194000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8818 1 episodes - episode_reward: -402.102 [-402.102, -402.102] - loss: 36.500 - mae: 66.068 - mean_q: 71.907 Interval 2390 (1194500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0314 Interval 2391 (1195000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3834 3 episodes - episode_reward: -416.342 [-548.688, -341.292] - loss: 37.742 - mae: 65.444 - mean_q: 71.489 Interval 2392 (1195500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5318 3 episodes - episode_reward: -423.701 [-453.745, -369.590] - loss: 32.884 - mae: 65.827 - mean_q: 70.065 Interval 2393 (1196000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0584 Interval 2394 (1196500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2830 1 episodes - episode_reward: 82.074 [82.074, 82.074] - loss: 41.713 - mae: 64.976 - mean_q: 69.378 Interval 2395 (1197000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5237 2 episodes - episode_reward: -149.408 [-200.194, -98.622] - loss: 31.087 - mae: 64.810 - mean_q: 69.017 Interval 2396 (1197500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8108 4 episodes - episode_reward: -90.802 [-212.904, 0.883] - loss: 34.443 - mae: 64.461 - mean_q: 67.760 Interval 2397 (1198000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0637 Interval 2398 (1198500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1695 1 episodes - episode_reward: -99.306 [-99.306, -99.306] - loss: 27.239 - mae: 63.148 - mean_q: 65.863 Interval 2399 (1199000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4587 1 episodes - episode_reward: -236.709 [-236.709, -236.709] - loss: 34.871 - mae: 62.765 - mean_q: 65.562 Interval 2400 (1199500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1293 Interval 2401 (1200000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2911 1 episodes - episode_reward: -219.434 [-219.434, -219.434] - loss: 31.730 - mae: 63.218 - mean_q: 65.989 Interval 2402 (1200500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9208 1 episodes - episode_reward: -1409.869 [-1409.869, -1409.869] - loss: 33.776 - mae: 63.788 - mean_q: 66.689 Interval 2403 (1201000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3241 2 episodes - episode_reward: -95.004 [-108.159, -81.850] - loss: 26.111 - mae: 63.867 - mean_q: 64.676 Interval 2404 (1201500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0898 Interval 2405 (1202000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1315 Interval 2406 (1202500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9645 3 episodes - episode_reward: -204.237 [-333.917, -100.000] - loss: 29.825 - mae: 64.591 - mean_q: 64.462 Interval 2407 (1203000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1205 1 episodes - episode_reward: 86.283 [86.283, 86.283] - loss: 30.242 - mae: 63.433 - mean_q: 63.419 Interval 2408 (1203500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0116 Interval 2409 (1204000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8986 3 episodes - episode_reward: -152.615 [-247.859, -103.494] - loss: 31.187 - mae: 62.397 - mean_q: 60.858 Interval 2410 (1204500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5693 1 episodes - episode_reward: -328.499 [-328.499, -328.499] - loss: 30.441 - mae: 61.645 - mean_q: 58.909 Interval 2411 (1205000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2889 2 episodes - episode_reward: -83.779 [-332.741, 165.182] - loss: 30.415 - mae: 61.388 - mean_q: 57.952 Interval 2412 (1205500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1778 Interval 2413 (1206000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1425 Interval 2414 (1206500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8289 3 episodes - episode_reward: -319.896 [-634.714, -108.168] - loss: 31.214 - mae: 60.800 - mean_q: 55.321 Interval 2415 (1207000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7766 1 episodes - episode_reward: -941.220 [-941.220, -941.220] - loss: 30.952 - mae: 59.631 - mean_q: 52.760 Interval 2416 (1207500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0409 1 episodes - episode_reward: -26.564 [-26.564, -26.564] - loss: 35.435 - mae: 59.305 - mean_q: 51.644 Interval 2417 (1208000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4009 4 episodes - episode_reward: -169.250 [-336.751, -19.032] - loss: 32.974 - mae: 59.088 - mean_q: 51.352 Interval 2418 (1208500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6199 3 episodes - episode_reward: -250.820 [-315.646, -172.801] - loss: 32.620 - mae: 58.061 - mean_q: 49.161 Interval 2419 (1209000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6784 3 episodes - episode_reward: -306.598 [-402.237, -123.063] - loss: 33.117 - mae: 57.691 - mean_q: 48.249 Interval 2420 (1209500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2171 1 episodes - episode_reward: -101.828 [-101.828, -101.828] - loss: 30.447 - mae: 56.598 - mean_q: 45.968 Interval 2421 (1210000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6844 2 episodes - episode_reward: -145.628 [-218.706, -72.550] - loss: 28.868 - mae: 55.211 - mean_q: 44.249 Interval 2422 (1210500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3117 1 episodes - episode_reward: -134.808 [-134.808, -134.808] - loss: 31.024 - mae: 54.519 - mean_q: 43.231 Interval 2423 (1211000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4810 2 episodes - episode_reward: -161.656 [-197.357, -125.954] - loss: 30.489 - mae: 53.966 - mean_q: 42.074 Interval 2424 (1211500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3505 4 episodes - episode_reward: -155.405 [-265.474, -82.471] - loss: 31.149 - mae: 53.453 - mean_q: 41.506 Interval 2425 (1212000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5745 1 episodes - episode_reward: -2283.569 [-2283.569, -2283.569] - loss: 33.728 - mae: 53.394 - mean_q: 39.109 Interval 2426 (1212500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2726 3 episodes - episode_reward: -207.769 [-326.640, -77.844] - loss: 34.387 - mae: 52.506 - mean_q: 36.388 Interval 2427 (1213000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0201 Interval 2428 (1213500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4126 Interval 2429 (1214000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.9884 1 episodes - episode_reward: -2253.845 [-2253.845, -2253.845] - loss: 31.202 - mae: 51.774 - mean_q: 34.754 Interval 2430 (1214500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7169 Interval 2431 (1215000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0756 1 episodes - episode_reward: -391.690 [-391.690, -391.690] - loss: 30.736 - mae: 51.626 - mean_q: 34.306 Interval 2432 (1215500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3224 2 episodes - episode_reward: -513.592 [-928.854, -98.331] - loss: 34.041 - mae: 51.734 - mean_q: 35.550 Interval 2433 (1216000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0353 Interval 2434 (1216500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2015 2 episodes - episode_reward: -127.116 [-154.232, -100.000] - loss: 29.364 - mae: 51.671 - mean_q: 35.917 Interval 2435 (1217000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3156 Interval 2436 (1217500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5511 1 episodes - episode_reward: 140.397 [140.397, 140.397] - loss: 31.953 - mae: 51.803 - mean_q: 36.707 Interval 2437 (1218000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5063 4 episodes - episode_reward: -339.105 [-592.638, -113.075] - loss: 32.491 - mae: 51.306 - mean_q: 36.447 Interval 2438 (1218500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3975 2 episodes - episode_reward: -5.937 [-16.588, 4.713] - loss: 31.274 - mae: 51.126 - mean_q: 35.431 Interval 2439 (1219000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1306 2 episodes - episode_reward: -315.908 [-477.880, -153.936] - loss: 29.537 - mae: 51.513 - mean_q: 34.577 Interval 2440 (1219500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2716 1 episodes - episode_reward: -693.665 [-693.665, -693.665] - loss: 29.131 - mae: 50.896 - mean_q: 34.503 Interval 2441 (1220000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1859 Interval 2442 (1220500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0118 Interval 2443 (1221000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0795 Interval 2444 (1221500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2257 1 episodes - episode_reward: 33.749 [33.749, 33.749] - loss: 27.341 - mae: 50.918 - mean_q: 35.923 Interval 2445 (1222000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2701 1 episodes - episode_reward: -143.355 [-143.355, -143.355] - loss: 29.468 - mae: 51.744 - mean_q: 36.194 Interval 2446 (1222500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0091 Interval 2447 (1223000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1777 4 episodes - episode_reward: -148.624 [-272.238, -10.111] - loss: 32.589 - mae: 52.413 - mean_q: 36.754 Interval 2448 (1223500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.1220 1 episodes - episode_reward: 28.890 [28.890, 28.890] - loss: 32.452 - mae: 52.015 - mean_q: 36.869 Interval 2449 (1224000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2535 Interval 2450 (1224500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0888 1 episodes - episode_reward: -165.709 [-165.709, -165.709] - loss: 27.460 - mae: 51.526 - mean_q: 38.531 Interval 2451 (1225000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7515 1 episodes - episode_reward: -329.777 [-329.777, -329.777] - loss: 30.625 - mae: 51.709 - mean_q: 38.946 Interval 2452 (1225500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2491 1 episodes - episode_reward: -108.395 [-108.395, -108.395] - loss: 30.359 - mae: 51.134 - mean_q: 37.037 Interval 2453 (1226000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3170 1 episodes - episode_reward: -147.730 [-147.730, -147.730] - loss: 28.340 - mae: 51.076 - mean_q: 37.408 Interval 2454 (1226500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3085 1 episodes - episode_reward: 128.803 [128.803, 128.803] - loss: 27.737 - mae: 51.663 - mean_q: 37.388 Interval 2455 (1227000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0917 1 episodes - episode_reward: -19.778 [-19.778, -19.778] - loss: 29.965 - mae: 51.704 - mean_q: 38.422 Interval 2456 (1227500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5158 1 episodes - episode_reward: -133.162 [-133.162, -133.162] - loss: 28.376 - mae: 51.602 - mean_q: 37.353 Interval 2457 (1228000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1349 Interval 2458 (1228500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1637 Interval 2459 (1229000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1651 1 episodes - episode_reward: -209.833 [-209.833, -209.833] - loss: 26.413 - mae: 52.114 - mean_q: 37.823 Interval 2460 (1229500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5116 2 episodes - episode_reward: -121.451 [-187.777, -55.125] - loss: 29.969 - mae: 51.955 - mean_q: 37.055 Interval 2461 (1230000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2438 1 episodes - episode_reward: -92.061 [-92.061, -92.061] - loss: 26.429 - mae: 51.132 - mean_q: 38.421 Interval 2462 (1230500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.1114 1 episodes - episode_reward: -56.843 [-56.843, -56.843] - loss: 31.602 - mae: 51.412 - mean_q: 38.521 Interval 2463 (1231000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2938 1 episodes - episode_reward: -65.476 [-65.476, -65.476] - loss: 28.850 - mae: 51.117 - mean_q: 38.940 Interval 2464 (1231500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2573 1 episodes - episode_reward: -69.817 [-69.817, -69.817] - loss: 27.784 - mae: 51.346 - mean_q: 39.632 Interval 2465 (1232000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0627 Interval 2466 (1232500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2511 1 episodes - episode_reward: -169.932 [-169.932, -169.932] - loss: 29.199 - mae: 50.607 - mean_q: 40.196 Interval 2467 (1233000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5797 3 episodes - episode_reward: -76.377 [-120.166, -1.092] - loss: 25.615 - mae: 50.155 - mean_q: 39.072 Interval 2468 (1233500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3371 2 episodes - episode_reward: -87.295 [-109.560, -65.029] - loss: 26.136 - mae: 49.390 - mean_q: 39.146 Interval 2469 (1234000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0659 1 episodes - episode_reward: -19.825 [-19.825, -19.825] - loss: 26.742 - mae: 48.723 - mean_q: 37.320 Interval 2470 (1234500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1379 1 episodes - episode_reward: -8.757 [-8.757, -8.757] - loss: 28.140 - mae: 49.352 - mean_q: 37.247 Interval 2471 (1235000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1155 1 episodes - episode_reward: -110.395 [-110.395, -110.395] - loss: 28.533 - mae: 48.604 - mean_q: 39.535 Interval 2472 (1235500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2688 6 episodes - episode_reward: -98.351 [-238.777, -37.842] - loss: 27.511 - mae: 48.144 - mean_q: 38.788 Interval 2473 (1236000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.4745 1 episodes - episode_reward: 222.063 [222.063, 222.063] - loss: 26.619 - mae: 47.822 - mean_q: 38.297 Interval 2474 (1236500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8795 3 episodes - episode_reward: -148.019 [-297.983, 31.993] - loss: 23.491 - mae: 47.221 - mean_q: 37.130 Interval 2475 (1237000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1477 2 episodes - episode_reward: -47.087 [-72.865, -21.308] - loss: 23.778 - mae: 46.565 - mean_q: 36.194 Interval 2476 (1237500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0592 Interval 2477 (1238000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6398 2 episodes - episode_reward: -159.812 [-235.282, -84.341] - loss: 23.775 - mae: 45.731 - mean_q: 34.756 Interval 2478 (1238500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: 0.0792 2 episodes - episode_reward: -50.993 [-59.631, -42.355] - loss: 27.164 - mae: 45.380 - mean_q: 36.742 Interval 2479 (1239000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1944 Interval 2480 (1239500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5033 1 episodes - episode_reward: 213.308 [213.308, 213.308] - loss: 26.250 - mae: 44.777 - mean_q: 33.879 Interval 2481 (1240000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1573 Interval 2482 (1240500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0701 1 episodes - episode_reward: -90.436 [-90.436, -90.436] - loss: 25.874 - mae: 42.662 - mean_q: 31.710 Interval 2483 (1241000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5181 2 episodes - episode_reward: -76.309 [-117.371, -35.248] - loss: 25.724 - mae: 43.041 - mean_q: 32.070 Interval 2484 (1241500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5463 2 episodes - episode_reward: -178.014 [-208.130, -147.898] - loss: 30.614 - mae: 42.715 - mean_q: 31.314 Interval 2485 (1242000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6068 3 episodes - episode_reward: -77.740 [-100.000, -60.482] - loss: 23.712 - mae: 42.484 - mean_q: 31.333 Interval 2486 (1242500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4908 1 episodes - episode_reward: -267.532 [-267.532, -267.532] - loss: 23.973 - mae: 42.198 - mean_q: 31.891 Interval 2487 (1243000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0789 Interval 2488 (1243500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0277 1 episodes - episode_reward: -38.466 [-38.466, -38.466] - loss: 22.116 - mae: 41.899 - mean_q: 28.118 Interval 2489 (1244000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2548 2 episodes - episode_reward: -36.847 [-60.323, -13.371] - loss: 20.221 - mae: 41.648 - mean_q: 27.877 Interval 2490 (1244500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1655 2 episodes - episode_reward: -79.044 [-94.319, -63.769] - loss: 20.921 - mae: 40.974 - mean_q: 27.587 Interval 2491 (1245000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1043 Interval 2492 (1245500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1860 1 episodes - episode_reward: 137.768 [137.768, 137.768] - loss: 24.517 - mae: 38.841 - mean_q: 27.952 Interval 2493 (1246000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4380 2 episodes - episode_reward: -105.662 [-168.825, -42.499] - loss: 21.030 - mae: 39.030 - mean_q: 27.141 Interval 2494 (1246500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8682 2 episodes - episode_reward: -192.507 [-228.303, -156.711] - loss: 24.814 - mae: 39.300 - mean_q: 26.074 Interval 2495 (1247000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6610 3 episodes - episode_reward: -297.799 [-489.282, -89.212] - loss: 20.359 - mae: 38.885 - mean_q: 25.933 Interval 2496 (1247500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1258 3 episodes - episode_reward: -192.342 [-240.539, -161.038] - loss: 22.242 - mae: 38.988 - mean_q: 26.207 Interval 2497 (1248000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0369 1 episodes - episode_reward: -123.357 [-123.357, -123.357] - loss: 21.718 - mae: 38.510 - mean_q: 25.488 Interval 2498 (1248500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3684 3 episodes - episode_reward: 8.744 [-200.962, 277.621] - loss: 21.812 - mae: 38.833 - mean_q: 25.349 Interval 2499 (1249000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5553 3 episodes - episode_reward: -126.307 [-164.292, -96.372] - loss: 22.161 - mae: 38.705 - mean_q: 25.754 Interval 2500 (1249500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2881 2 episodes - episode_reward: -63.010 [-72.785, -53.236] - loss: 24.851 - mae: 38.354 - mean_q: 24.722 Interval 2501 (1250000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9505 2 episodes - episode_reward: -222.521 [-469.440, 24.398] - loss: 24.891 - mae: 38.433 - mean_q: 24.190 Interval 2502 (1250500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1223 4 episodes - episode_reward: -147.477 [-305.552, -81.313] - loss: 20.789 - mae: 37.791 - mean_q: 24.226 Interval 2503 (1251000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6258 2 episodes - episode_reward: -146.655 [-222.105, -71.204] - loss: 19.568 - mae: 37.576 - mean_q: 24.083 Interval 2504 (1251500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6991 3 episodes - episode_reward: -122.860 [-197.147, -73.163] - loss: 20.518 - mae: 37.725 - mean_q: 23.922 Interval 2505 (1252000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5582 2 episodes - episode_reward: -99.683 [-134.968, -64.398] - loss: 19.046 - mae: 37.762 - mean_q: 24.005 Interval 2506 (1252500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6514 3 episodes - episode_reward: -119.728 [-185.485, -40.521] - loss: 24.926 - mae: 37.589 - mean_q: 24.042 Interval 2507 (1253000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1158 3 episodes - episode_reward: -172.061 [-258.257, -81.143] - loss: 21.262 - mae: 37.236 - mean_q: 22.375 Interval 2508 (1253500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1547 2 episodes - episode_reward: -97.508 [-114.291, -80.724] - loss: 23.526 - mae: 37.527 - mean_q: 21.895 Interval 2509 (1254000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5689 2 episodes - episode_reward: -119.779 [-140.339, -99.220] - loss: 26.265 - mae: 37.293 - mean_q: 22.241 Interval 2510 (1254500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9045 4 episodes - episode_reward: -110.348 [-131.292, -48.325] - loss: 23.975 - mae: 37.156 - mean_q: 21.499 Interval 2511 (1255000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5120 1 episodes - episode_reward: -202.447 [-202.447, -202.447] - loss: 23.637 - mae: 36.394 - mean_q: 20.093 Interval 2512 (1255500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6756 3 episodes - episode_reward: -132.874 [-157.394, -102.347] - loss: 22.209 - mae: 36.483 - mean_q: 20.533 Interval 2513 (1256000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2986 3 episodes - episode_reward: -103.750 [-130.328, -80.922] - loss: 22.720 - mae: 36.053 - mean_q: 19.141 Interval 2514 (1256500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5930 3 episodes - episode_reward: -377.247 [-583.677, -57.957] - loss: 24.099 - mae: 35.401 - mean_q: 18.012 Interval 2515 (1257000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7844 2 episodes - episode_reward: -165.491 [-271.339, -59.642] - loss: 19.157 - mae: 34.612 - mean_q: 17.749 Interval 2516 (1257500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5426 1 episodes - episode_reward: -242.867 [-242.867, -242.867] - loss: 20.812 - mae: 34.526 - mean_q: 16.515 Interval 2517 (1258000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3595 1 episodes - episode_reward: -198.076 [-198.076, -198.076] - loss: 20.496 - mae: 34.443 - mean_q: 15.933 Interval 2518 (1258500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6655 3 episodes - episode_reward: -140.123 [-233.143, -82.964] - loss: 22.015 - mae: 34.335 - mean_q: 16.388 Interval 2519 (1259000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9787 7 episodes - episode_reward: -83.711 [-100.000, -41.034] - loss: 22.206 - mae: 34.449 - mean_q: 15.948 Interval 2520 (1259500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3537 1 episodes - episode_reward: -85.325 [-85.325, -85.325] - loss: 23.292 - mae: 34.095 - mean_q: 16.813 Interval 2521 (1260000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6382 2 episodes - episode_reward: -376.580 [-660.875, -92.285] - loss: 22.779 - mae: 34.173 - mean_q: 16.057 Interval 2522 (1260500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2231 4 episodes - episode_reward: -165.616 [-252.395, -120.279] - loss: 20.361 - mae: 33.757 - mean_q: 16.076 Interval 2523 (1261000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4634 5 episodes - episode_reward: -113.894 [-218.811, -8.167] - loss: 21.198 - mae: 34.424 - mean_q: 15.706 Interval 2524 (1261500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6841 3 episodes - episode_reward: -331.396 [-627.654, -135.970] - loss: 21.567 - mae: 33.557 - mean_q: 15.779 Interval 2525 (1262000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2889 1 episodes - episode_reward: -183.084 [-183.084, -183.084] - loss: 20.090 - mae: 33.240 - mean_q: 15.509 Interval 2526 (1262500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3321 4 episodes - episode_reward: -32.300 [-159.023, 217.660] - loss: 21.624 - mae: 33.658 - mean_q: 15.279 Interval 2527 (1263000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3778 2 episodes - episode_reward: -133.513 [-168.876, -98.149] - loss: 22.878 - mae: 33.037 - mean_q: 14.937 Interval 2528 (1263500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6662 3 episodes - episode_reward: -85.524 [-263.770, 196.249] - loss: 23.109 - mae: 32.885 - mean_q: 14.461 Interval 2529 (1264000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7489 3 episodes - episode_reward: -124.361 [-163.197, -88.292] - loss: 20.630 - mae: 32.855 - mean_q: 14.725 Interval 2530 (1264500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1426 Interval 2531 (1265000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1155 3 episodes - episode_reward: -374.655 [-573.067, -217.591] - loss: 19.122 - mae: 32.459 - mean_q: 13.656 Interval 2532 (1265500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3928 2 episodes - episode_reward: -91.037 [-148.498, -33.577] - loss: 21.345 - mae: 32.538 - mean_q: 14.316 Interval 2533 (1266000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1812 Interval 2534 (1266500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -1.3443 Interval 2535 (1267000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3429 1 episodes - episode_reward: -1006.623 [-1006.623, -1006.623] - loss: 21.397 - mae: 31.667 - mean_q: 12.849 Interval 2536 (1267500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3726 Interval 2537 (1268000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0087 Interval 2538 (1268500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0777 Interval 2539 (1269000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9266 1 episodes - episode_reward: -965.189 [-965.189, -965.189] - loss: 21.273 - mae: 30.983 - mean_q: 13.447 Interval 2540 (1269500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0317 1 episodes - episode_reward: -561.856 [-561.856, -561.856] - loss: 20.512 - mae: 31.585 - mean_q: 15.146 Interval 2541 (1270000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4603 3 episodes - episode_reward: -410.399 [-608.634, -92.091] - loss: 20.523 - mae: 31.462 - mean_q: 15.208 Interval 2542 (1270500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4705 4 episodes - episode_reward: -321.526 [-564.478, -91.053] - loss: 22.573 - mae: 32.905 - mean_q: 15.077 Interval 2543 (1271000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3338 1 episodes - episode_reward: -666.983 [-666.983, -666.983] - loss: 21.393 - mae: 33.497 - mean_q: 15.696 Interval 2544 (1271500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5355 1 episodes - episode_reward: -759.122 [-759.122, -759.122] - loss: 23.983 - mae: 34.140 - mean_q: 16.113 Interval 2545 (1272000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3502 3 episodes - episode_reward: -562.843 [-694.181, -318.747] - loss: 23.010 - mae: 34.779 - mean_q: 16.132 Interval 2546 (1272500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1452 3 episodes - episode_reward: -326.116 [-644.917, -100.000] - loss: 22.249 - mae: 35.198 - mean_q: 16.448 Interval 2547 (1273000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9008 5 episodes - episode_reward: -475.107 [-714.822, -145.118] - loss: 22.686 - mae: 35.740 - mean_q: 15.894 Interval 2548 (1273500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0300 3 episodes - episode_reward: -330.645 [-423.008, -180.664] - loss: 23.631 - mae: 35.557 - mean_q: 15.258 Interval 2549 (1274000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5538 5 episodes - episode_reward: -378.347 [-1108.514, -124.571] - loss: 28.591 - mae: 36.325 - mean_q: 14.753 Interval 2550 (1274500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1576 2 episodes - episode_reward: -806.046 [-932.997, -679.095] - loss: 26.829 - mae: 36.558 - mean_q: 14.656 Interval 2551 (1275000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4009 6 episodes - episode_reward: -346.565 [-704.934, -106.126] - loss: 26.329 - mae: 37.054 - mean_q: 15.487 Interval 2552 (1275500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2520 4 episodes - episode_reward: -571.265 [-920.665, -247.220] - loss: 25.316 - mae: 37.780 - mean_q: 15.725 Interval 2553 (1276000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8590 1 episodes - episode_reward: -1585.047 [-1585.047, -1585.047] - loss: 23.345 - mae: 39.018 - mean_q: 15.962 Interval 2554 (1276500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6801 2 episodes - episode_reward: -1060.571 [-1064.706, -1056.435] - loss: 26.289 - mae: 39.738 - mean_q: 15.696 Interval 2555 (1277000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7403 2 episodes - episode_reward: -487.684 [-628.178, -347.189] - loss: 28.469 - mae: 41.454 - mean_q: 15.033 Interval 2556 (1277500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8424 2 episodes - episode_reward: -393.116 [-501.050, -285.182] - loss: 26.640 - mae: 42.499 - mean_q: 16.493 Interval 2557 (1278000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1394 3 episodes - episode_reward: -581.413 [-693.445, -488.806] - loss: 32.783 - mae: 43.512 - mean_q: 16.425 Interval 2558 (1278500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4609 1 episodes - episode_reward: -648.120 [-648.120, -648.120] - loss: 31.526 - mae: 45.010 - mean_q: 16.914 Interval 2559 (1279000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2664 2 episodes - episode_reward: -328.003 [-500.360, -155.647] - loss: 31.966 - mae: 46.821 - mean_q: 18.786 Interval 2560 (1279500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3970 1 episodes - episode_reward: -1280.662 [-1280.662, -1280.662] - loss: 34.282 - mae: 48.158 - mean_q: 19.668 Interval 2561 (1280000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6080 2 episodes - episode_reward: -588.726 [-917.927, -259.525] - loss: 34.536 - mae: 50.421 - mean_q: 21.486 Interval 2562 (1280500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7745 3 episodes - episode_reward: -295.382 [-571.916, -110.244] - loss: 35.097 - mae: 52.214 - mean_q: 22.270 Interval 2563 (1281000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4598 1 episodes - episode_reward: -1328.003 [-1328.003, -1328.003] - loss: 45.500 - mae: 53.443 - mean_q: 24.426 Interval 2564 (1281500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7946 Interval 2565 (1282000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6358 2 episodes - episode_reward: -820.235 [-1014.094, -626.376] - loss: 40.023 - mae: 56.342 - mean_q: 25.532 Interval 2566 (1282500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5041 2 episodes - episode_reward: -378.263 [-656.526, -100.000] - loss: 40.321 - mae: 57.303 - mean_q: 26.328 Interval 2567 (1283000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1059 Interval 2568 (1283500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7248 1 episodes - episode_reward: -1278.982 [-1278.982, -1278.982] - loss: 43.580 - mae: 59.442 - mean_q: 30.757 Interval 2569 (1284000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8298 1 episodes - episode_reward: -709.628 [-709.628, -709.628] - loss: 45.311 - mae: 61.008 - mean_q: 30.736 Interval 2570 (1284500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0848 3 episodes - episode_reward: -323.779 [-558.120, -101.710] - loss: 37.127 - mae: 62.326 - mean_q: 31.719 Interval 2571 (1285000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6312 1 episodes - episode_reward: -643.067 [-643.067, -643.067] - loss: 43.507 - mae: 63.464 - mean_q: 32.111 Interval 2572 (1285500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5935 1 episodes - episode_reward: -395.904 [-395.904, -395.904] - loss: 47.665 - mae: 65.174 - mean_q: 32.962 Interval 2573 (1286000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4264 Interval 2574 (1286500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1847 1 episodes - episode_reward: -765.807 [-765.807, -765.807] - loss: 44.248 - mae: 66.061 - mean_q: 35.782 Interval 2575 (1287000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2389 2 episodes - episode_reward: -323.226 [-499.834, -146.619] - loss: 45.857 - mae: 66.869 - mean_q: 34.138 Interval 2576 (1287500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7816 Interval 2577 (1288000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8594 2 episodes - episode_reward: -656.813 [-1081.411, -232.215] - loss: 41.475 - mae: 68.152 - mean_q: 34.235 Interval 2578 (1288500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2906 1 episodes - episode_reward: -639.723 [-639.723, -639.723] - loss: 45.365 - mae: 68.750 - mean_q: 34.838 Interval 2579 (1289000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1799 1 episodes - episode_reward: -537.112 [-537.112, -537.112] - loss: 45.391 - mae: 69.879 - mean_q: 34.703 Interval 2580 (1289500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1187 2 episodes - episode_reward: -294.452 [-454.342, -134.561] - loss: 47.287 - mae: 70.804 - mean_q: 34.812 Interval 2581 (1290000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8257 2 episodes - episode_reward: -453.341 [-616.648, -290.034] - loss: 46.687 - mae: 71.558 - mean_q: 34.432 Interval 2582 (1290500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3391 1 episodes - episode_reward: -150.247 [-150.247, -150.247] - loss: 51.633 - mae: 73.216 - mean_q: 36.556 Interval 2583 (1291000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5331 5 episodes - episode_reward: -268.118 [-532.564, -100.000] - loss: 50.080 - mae: 74.126 - mean_q: 35.602 Interval 2584 (1291500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1506 4 episodes - episode_reward: -107.035 [-135.344, -88.732] - loss: 56.923 - mae: 76.874 - mean_q: 36.434 Interval 2585 (1292000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8712 1 episodes - episode_reward: -229.961 [-229.961, -229.961] - loss: 54.269 - mae: 77.849 - mean_q: 40.008 Interval 2586 (1292500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6225 2 episodes - episode_reward: -305.979 [-463.093, -148.865] - loss: 54.362 - mae: 78.827 - mean_q: 41.273 Interval 2587 (1293000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2801 1 episodes - episode_reward: -88.141 [-88.141, -88.141] - loss: 56.851 - mae: 79.882 - mean_q: 43.302 Interval 2588 (1293500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2154 1 episodes - episode_reward: -149.767 [-149.767, -149.767] - loss: 53.648 - mae: 80.468 - mean_q: 42.898 Interval 2589 (1294000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5003 1 episodes - episode_reward: -171.479 [-171.479, -171.479] - loss: 64.493 - mae: 81.290 - mean_q: 43.261 Interval 2590 (1294500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2144 3 episodes - episode_reward: -230.556 [-396.933, -100.000] - loss: 64.509 - mae: 81.502 - mean_q: 44.412 Interval 2591 (1295000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4809 1 episodes - episode_reward: -140.561 [-140.561, -140.561] - loss: 62.080 - mae: 81.573 - mean_q: 45.491 Interval 2592 (1295500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1683 7 episodes - episode_reward: -180.223 [-358.747, -9.820] - loss: 69.864 - mae: 82.606 - mean_q: 46.603 Interval 2593 (1296000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0953 Interval 2594 (1296500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1950 5 episodes - episode_reward: -219.427 [-288.169, -124.647] - loss: 72.673 - mae: 84.907 - mean_q: 51.629 Interval 2595 (1297000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7311 4 episodes - episode_reward: -211.167 [-244.695, -167.914] - loss: 74.726 - mae: 84.839 - mean_q: 51.564 Interval 2596 (1297500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0287 3 episodes - episode_reward: -183.401 [-202.927, -157.921] - loss: 74.848 - mae: 86.397 - mean_q: 55.384 Interval 2597 (1298000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2343 3 episodes - episode_reward: -193.544 [-267.741, -128.689] - loss: 81.194 - mae: 87.541 - mean_q: 53.991 Interval 2598 (1298500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7832 1 episodes - episode_reward: -413.821 [-413.821, -413.821] - loss: 73.104 - mae: 87.304 - mean_q: 55.790 Interval 2599 (1299000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8969 2 episodes - episode_reward: -212.320 [-219.828, -204.812] - loss: 77.460 - mae: 87.140 - mean_q: 55.922 Interval 2600 (1299500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7583 1 episodes - episode_reward: -290.133 [-290.133, -290.133] - loss: 76.736 - mae: 85.587 - mean_q: 51.969 Interval 2601 (1300000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1065 Interval 2602 (1300500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2997 Interval 2603 (1301000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0083 3 episodes - episode_reward: -396.067 [-678.031, -254.992] - loss: 72.701 - mae: 87.411 - mean_q: 58.523 Interval 2604 (1301500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8417 1 episodes - episode_reward: -155.971 [-155.971, -155.971] - loss: 69.501 - mae: 87.670 - mean_q: 56.392 Interval 2605 (1302000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8006 5 episodes - episode_reward: -250.885 [-561.281, -40.125] - loss: 75.541 - mae: 86.039 - mean_q: 56.819 Interval 2606 (1302500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7636 3 episodes - episode_reward: -291.407 [-387.345, -210.723] - loss: 72.478 - mae: 84.425 - mean_q: 53.792 Interval 2607 (1303000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3990 4 episodes - episode_reward: -290.994 [-459.714, -190.375] - loss: 76.656 - mae: 84.637 - mean_q: 55.742 Interval 2608 (1303500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3130 3 episodes - episode_reward: -249.984 [-311.307, -177.533] - loss: 69.854 - mae: 84.339 - mean_q: 55.848 Interval 2609 (1304000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2153 4 episodes - episode_reward: -282.742 [-393.398, -240.772] - loss: 68.825 - mae: 83.690 - mean_q: 54.127 Interval 2610 (1304500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2092 5 episodes - episode_reward: -183.361 [-370.728, -57.218] - loss: 68.357 - mae: 83.042 - mean_q: 51.716 Interval 2611 (1305000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6745 4 episodes - episode_reward: -378.998 [-506.093, -189.448] - loss: 68.797 - mae: 82.275 - mean_q: 51.540 Interval 2612 (1305500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0433 5 episodes - episode_reward: -202.163 [-261.286, -100.000] - loss: 65.926 - mae: 81.869 - mean_q: 50.435 Interval 2613 (1306000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1743 3 episodes - episode_reward: -177.638 [-217.719, -134.771] - loss: 64.885 - mae: 81.750 - mean_q: 49.835 Interval 2614 (1306500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9298 7 episodes - episode_reward: -208.698 [-352.897, -86.626] - loss: 69.058 - mae: 81.556 - mean_q: 51.509 Interval 2615 (1307000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9753 4 episodes - episode_reward: -206.037 [-281.942, -153.670] - loss: 69.621 - mae: 82.180 - mean_q: 51.264 Interval 2616 (1307500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3290 5 episodes - episode_reward: -265.837 [-377.901, -119.121] - loss: 70.020 - mae: 82.246 - mean_q: 52.171 Interval 2617 (1308000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2347 7 episodes - episode_reward: -227.264 [-394.746, -164.397] - loss: 70.264 - mae: 82.462 - mean_q: 51.417 Interval 2618 (1308500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7509 6 episodes - episode_reward: -235.043 [-338.166, -106.559] - loss: 72.792 - mae: 83.177 - mean_q: 52.869 Interval 2619 (1309000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1630 7 episodes - episode_reward: -221.026 [-336.738, -130.071] - loss: 77.803 - mae: 84.230 - mean_q: 53.361 Interval 2620 (1309500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0420 7 episodes - episode_reward: -267.839 [-347.940, -173.882] - loss: 83.003 - mae: 86.102 - mean_q: 55.466 Interval 2621 (1310000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3837 8 episodes - episode_reward: -237.054 [-497.818, -78.925] - loss: 88.740 - mae: 89.170 - mean_q: 59.530 Interval 2622 (1310500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6136 4 episodes - episode_reward: -207.556 [-395.573, -114.554] - loss: 102.200 - mae: 95.577 - mean_q: 67.484 Interval 2623 (1311000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9639 5 episodes - episode_reward: -213.574 [-282.124, -119.289] - loss: 121.524 - mae: 102.071 - mean_q: 75.979 Interval 2624 (1311500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9073 5 episodes - episode_reward: -351.626 [-566.090, -218.690] - loss: 139.878 - mae: 112.189 - mean_q: 89.122 Interval 2625 (1312000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7108 5 episodes - episode_reward: -286.870 [-501.706, -163.392] - loss: 154.775 - mae: 123.902 - mean_q: 102.741 Interval 2626 (1312500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7625 4 episodes - episode_reward: -186.012 [-359.241, 23.543] - loss: 189.941 - mae: 137.426 - mean_q: 121.566 Interval 2627 (1313000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9814 10 episodes - episode_reward: -214.547 [-572.390, -95.283] - loss: 219.135 - mae: 149.728 - mean_q: 137.175 Interval 2628 (1313500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4432 7 episodes - episode_reward: -237.991 [-529.970, -95.823] - loss: 253.925 - mae: 163.101 - mean_q: 153.116 Interval 2629 (1314000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3798 6 episodes - episode_reward: -204.696 [-259.894, -125.982] - loss: 271.201 - mae: 177.491 - mean_q: 171.002 Interval 2630 (1314500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3369 4 episodes - episode_reward: -417.764 [-499.859, -253.109] - loss: 288.767 - mae: 188.553 - mean_q: 184.862 Interval 2631 (1315000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6967 5 episodes - episode_reward: -364.915 [-723.656, -111.748] - loss: 312.534 - mae: 203.651 - mean_q: 203.881 Interval 2632 (1315500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2928 3 episodes - episode_reward: -447.441 [-481.085, -399.773] - loss: 353.332 - mae: 215.579 - mean_q: 217.518 Interval 2633 (1316000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8975 6 episodes - episode_reward: -299.698 [-636.562, -106.436] - loss: 378.168 - mae: 229.388 - mean_q: 232.764 Interval 2634 (1316500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3411 3 episodes - episode_reward: -338.548 [-531.344, -208.203] - loss: 405.343 - mae: 241.431 - mean_q: 248.940 Interval 2635 (1317000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1129 1 episodes - episode_reward: -412.510 [-412.510, -412.510] - loss: 414.614 - mae: 247.964 - mean_q: 254.125 Interval 2636 (1317500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5952 4 episodes - episode_reward: -386.093 [-636.368, -272.681] - loss: 456.368 - mae: 264.960 - mean_q: 278.419 Interval 2637 (1318000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1392 6 episodes - episode_reward: -262.326 [-444.832, -156.267] - loss: 470.812 - mae: 271.866 - mean_q: 284.367 Interval 2638 (1318500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1262 3 episodes - episode_reward: -526.973 [-705.767, -224.444] - loss: 476.422 - mae: 282.301 - mean_q: 295.714 Interval 2639 (1319000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3276 6 episodes - episode_reward: -362.624 [-565.042, -100.000] - loss: 510.201 - mae: 291.612 - mean_q: 308.857 Interval 2640 (1319500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0753 5 episodes - episode_reward: -290.553 [-412.728, -120.729] - loss: 489.430 - mae: 293.944 - mean_q: 311.016 Interval 2641 (1320000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2458 6 episodes - episode_reward: -273.488 [-516.954, -123.774] - loss: 547.764 - mae: 297.639 - mean_q: 313.391 Interval 2642 (1320500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9499 5 episodes - episode_reward: -398.984 [-696.664, -142.467] - loss: 580.877 - mae: 297.152 - mean_q: 309.879 Interval 2643 (1321000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5075 4 episodes - episode_reward: -449.998 [-569.859, -303.425] - loss: 604.389 - mae: 303.291 - mean_q: 318.581 Interval 2644 (1321500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7475 3 episodes - episode_reward: -432.534 [-849.057, -202.170] - loss: 648.388 - mae: 304.660 - mean_q: 318.827 Interval 2645 (1322000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7930 3 episodes - episode_reward: -240.282 [-272.026, -205.080] - loss: 605.318 - mae: 304.392 - mean_q: 319.231 Interval 2646 (1322500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6499 5 episodes - episode_reward: -311.413 [-448.299, -201.091] - loss: 631.699 - mae: 306.207 - mean_q: 323.016 Interval 2647 (1323000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6242 3 episodes - episode_reward: -381.500 [-413.464, -324.454] - loss: 580.879 - mae: 303.055 - mean_q: 318.551 Interval 2648 (1323500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5825 2 episodes - episode_reward: -701.878 [-865.655, -538.101] - loss: 658.695 - mae: 302.043 - mean_q: 319.533 Interval 2649 (1324000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3490 1 episodes - episode_reward: -681.088 [-681.088, -681.088] - loss: 623.734 - mae: 299.545 - mean_q: 318.064 Interval 2650 (1324500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0273 2 episodes - episode_reward: -448.923 [-565.735, -332.111] - loss: 623.241 - mae: 302.645 - mean_q: 326.997 Interval 2651 (1325000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7517 2 episodes - episode_reward: -783.658 [-904.151, -663.164] - loss: 599.599 - mae: 307.876 - mean_q: 335.957 Interval 2652 (1325500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3150 3 episodes - episode_reward: -357.445 [-452.967, -258.708] - loss: 619.878 - mae: 310.374 - mean_q: 340.372 Interval 2653 (1326000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9264 5 episodes - episode_reward: -418.172 [-736.753, -223.747] - loss: 684.796 - mae: 319.759 - mean_q: 359.085 Interval 2654 (1326500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2602 6 episodes - episode_reward: -428.706 [-578.854, -181.144] - loss: 722.640 - mae: 327.538 - mean_q: 369.483 Interval 2655 (1327000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9709 5 episodes - episode_reward: -360.237 [-529.013, -185.982] - loss: 775.541 - mae: 345.417 - mean_q: 396.134 Interval 2656 (1327500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2710 4 episodes - episode_reward: -410.594 [-480.512, -325.836] - loss: 795.577 - mae: 346.773 - mean_q: 400.005 Interval 2657 (1328000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2247 1 episodes - episode_reward: -718.115 [-718.115, -718.115] - loss: 884.539 - mae: 360.686 - mean_q: 420.328 Interval 2658 (1328500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5444 2 episodes - episode_reward: -637.893 [-686.473, -589.312] - loss: 847.134 - mae: 375.282 - mean_q: 446.733 Interval 2659 (1329000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0492 2 episodes - episode_reward: -295.096 [-296.703, -293.489] - loss: 926.734 - mae: 394.291 - mean_q: 473.338 Interval 2660 (1329500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3065 Interval 2661 (1330000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7963 4 episodes - episode_reward: -270.773 [-393.634, -179.916] - loss: 874.064 - mae: 426.565 - mean_q: 520.122 Interval 2662 (1330500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8356 2 episodes - episode_reward: -135.343 [-148.086, -122.601] - loss: 1145.720 - mae: 440.191 - mean_q: 540.875 Interval 2663 (1331000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6978 3 episodes - episode_reward: -283.076 [-383.552, -141.785] - loss: 872.201 - mae: 462.505 - mean_q: 572.404 Interval 2664 (1331500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3084 5 episodes - episode_reward: -255.220 [-280.910, -225.347] - loss: 1028.327 - mae: 477.988 - mean_q: 590.430 Interval 2665 (1332000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2952 3 episodes - episode_reward: -228.490 [-285.300, -180.522] - loss: 991.878 - mae: 504.002 - mean_q: 625.958 Interval 2666 (1332500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6079 3 episodes - episode_reward: -232.468 [-284.535, -200.024] - loss: 1360.027 - mae: 533.462 - mean_q: 668.062 Interval 2667 (1333000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1382 4 episodes - episode_reward: -304.752 [-498.318, -186.338] - loss: 1170.418 - mae: 559.048 - mean_q: 702.008 Interval 2668 (1333500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5590 3 episodes - episode_reward: -225.740 [-296.513, -132.405] - loss: 1305.538 - mae: 588.894 - mean_q: 742.790 Interval 2669 (1334000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3353 2 episodes - episode_reward: -252.025 [-254.132, -249.919] - loss: 1427.304 - mae: 594.731 - mean_q: 749.547 Interval 2670 (1334500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8615 3 episodes - episode_reward: -329.171 [-442.601, -222.687] - loss: 1449.390 - mae: 620.001 - mean_q: 786.613 Interval 2671 (1335000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7046 1 episodes - episode_reward: -310.077 [-310.077, -310.077] - loss: 1750.459 - mae: 635.581 - mean_q: 809.868 Interval 2672 (1335500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0565 Interval 2673 (1336000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6317 2 episodes - episode_reward: -740.968 [-1044.799, -437.137] - loss: 1668.390 - mae: 674.590 - mean_q: 866.700 Interval 2674 (1336500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3288 Interval 2675 (1337000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2097 Interval 2676 (1337500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1573 Interval 2677 (1338000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2221 Interval 2678 (1338500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2471 Interval 2679 (1339000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4667 1 episodes - episode_reward: -746.609 [-746.609, -746.609] - loss: 1630.511 - mae: 818.825 - mean_q: 1079.314 Interval 2680 (1339500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1170 Interval 2681 (1340000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1761 Interval 2682 (1340500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2417 Interval 2683 (1341000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1185 Interval 2684 (1341500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0132 1 episodes - episode_reward: -1388.984 [-1388.984, -1388.984] - loss: 1480.861 - mae: 927.520 - mean_q: 1230.889 Interval 2685 (1342000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3486 Interval 2686 (1342500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1583 Interval 2687 (1343000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3829 Interval 2688 (1343500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1895 2 episodes - episode_reward: -442.965 [-630.813, -255.118] - loss: 2009.372 - mae: 979.871 - mean_q: 1299.599 Interval 2689 (1344000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1965 Interval 2690 (1344500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0268 Interval 2691 (1345000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0440 Interval 2692 (1345500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.6031 Interval 2693 (1346000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7570 4 episodes - episode_reward: -744.412 [-2004.897, -208.511] - loss: 1925.460 - mae: 1001.765 - mean_q: 1326.158 Interval 2694 (1346500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2946 Interval 2695 (1347000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1286 Interval 2696 (1347500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1695 Interval 2697 (1348000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2227 Interval 2698 (1348500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0876 Interval 2699 (1349000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7580 Interval 2700 (1349500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1006 1 episodes - episode_reward: -1324.752 [-1324.752, -1324.752] - loss: 1502.209 - mae: 1020.357 - mean_q: 1351.444 Interval 2701 (1350000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3060 Interval 2702 (1350500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0551 Interval 2703 (1351000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2929 Interval 2704 (1351500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1139 Interval 2705 (1352000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1990 Interval 2706 (1352500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1680 Interval 2707 (1353000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.4442 Interval 2708 (1353500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9691 3 episodes - episode_reward: -613.046 [-1250.982, -162.938] - loss: 1984.972 - mae: 976.420 - mean_q: 1293.668 Interval 2709 (1354000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7466 1 episodes - episode_reward: -229.794 [-229.794, -229.794] - loss: 1539.402 - mae: 966.619 - mean_q: 1280.712 Interval 2710 (1354500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4003 2 episodes - episode_reward: -619.887 [-946.310, -293.464] - loss: 1422.220 - mae: 958.441 - mean_q: 1270.604 Interval 2711 (1355000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2396 Interval 2712 (1355500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5172 1 episodes - episode_reward: -1029.414 [-1029.414, -1029.414] - loss: 1895.389 - mae: 969.659 - mean_q: 1289.376 Interval 2713 (1356000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.3750 1 episodes - episode_reward: -2165.459 [-2165.459, -2165.459] - loss: 2078.620 - mae: 968.404 - mean_q: 1287.359 Interval 2714 (1356500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9675 4 episodes - episode_reward: -185.704 [-244.991, -144.406] - loss: 4052.477 - mae: 964.718 - mean_q: 1280.847 Interval 2715 (1357000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1957 2 episodes - episode_reward: -606.308 [-895.628, -316.988] - loss: 3574.091 - mae: 973.408 - mean_q: 1295.485 Interval 2716 (1357500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7913 3 episodes - episode_reward: -335.840 [-815.830, -82.871] - loss: 2857.920 - mae: 980.306 - mean_q: 1305.363 Interval 2717 (1358000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5289 4 episodes - episode_reward: -318.270 [-796.618, -110.346] - loss: 7901.992 - mae: 977.392 - mean_q: 1300.168 Interval 2718 (1358500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9732 2 episodes - episode_reward: -222.149 [-293.017, -151.281] - loss: 4186.792 - mae: 981.462 - mean_q: 1306.619 Interval 2719 (1359000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7775 3 episodes - episode_reward: -460.264 [-756.839, -50.998] - loss: 6205.623 - mae: 990.744 - mean_q: 1318.497 Interval 2720 (1359500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9882 3 episodes - episode_reward: -170.915 [-210.791, -132.785] - loss: 3700.468 - mae: 1023.655 - mean_q: 1365.193 Interval 2721 (1360000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9266 5 episodes - episode_reward: -400.046 [-813.912, -107.887] - loss: 7065.211 - mae: 1011.569 - mean_q: 1346.098 Interval 2722 (1360500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6744 5 episodes - episode_reward: -165.950 [-329.454, -100.000] - loss: 5704.221 - mae: 1030.576 - mean_q: 1372.816 Interval 2723 (1361000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4064 5 episodes - episode_reward: -143.229 [-261.440, -86.047] - loss: 6094.908 - mae: 1038.330 - mean_q: 1382.449 Interval 2724 (1361500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8599 3 episodes - episode_reward: -277.495 [-399.837, -107.465] - loss: 6979.603 - mae: 1060.060 - mean_q: 1413.195 Interval 2725 (1362000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8495 5 episodes - episode_reward: -414.085 [-692.684, -116.487] - loss: 8463.766 - mae: 1073.315 - mean_q: 1435.913 Interval 2726 (1362500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6017 2 episodes - episode_reward: -374.515 [-649.270, -99.761] - loss: 7196.701 - mae: 1107.390 - mean_q: 1484.566 Interval 2727 (1363000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8739 4 episodes - episode_reward: -216.078 [-466.114, -9.167] - loss: 9925.724 - mae: 1131.398 - mean_q: 1516.814 Interval 2728 (1363500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0507 3 episodes - episode_reward: -458.950 [-728.525, -309.292] - loss: 16553.887 - mae: 1170.732 - mean_q: 1566.933 Interval 2729 (1364000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0577 5 episodes - episode_reward: -344.046 [-706.010, -98.973] - loss: 11078.077 - mae: 1185.460 - mean_q: 1588.686 Interval 2730 (1364500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0232 3 episodes - episode_reward: -356.987 [-424.635, -282.223] - loss: 8157.424 - mae: 1201.693 - mean_q: 1608.323 Interval 2731 (1365000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.9117 6 episodes - episode_reward: -478.542 [-622.735, -323.138] - loss: 7120.211 - mae: 1216.688 - mean_q: 1631.132 Interval 2732 (1365500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0031 2 episodes - episode_reward: -570.670 [-651.304, -490.035] - loss: 9034.879 - mae: 1225.910 - mean_q: 1643.675 Interval 2733 (1366000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6739 1 episodes - episode_reward: -839.534 [-839.534, -839.534] - loss: 6370.822 - mae: 1244.623 - mean_q: 1670.345 Interval 2734 (1366500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1386 3 episodes - episode_reward: -466.888 [-748.563, -130.194] - loss: 6149.362 - mae: 1241.236 - mean_q: 1666.088 Interval 2735 (1367000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6398 2 episodes - episode_reward: -682.329 [-857.806, -506.853] - loss: 3923.412 - mae: 1256.240 - mean_q: 1684.340 Interval 2736 (1367500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4048 5 episodes - episode_reward: -447.529 [-679.233, -96.440] - loss: 5324.895 - mae: 1244.474 - mean_q: 1667.177 Interval 2737 (1368000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6818 3 episodes - episode_reward: -613.410 [-875.681, -102.864] - loss: 3338.446 - mae: 1249.064 - mean_q: 1673.732 Interval 2738 (1368500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8732 2 episodes - episode_reward: -722.344 [-850.412, -594.276] - loss: 3625.262 - mae: 1232.049 - mean_q: 1649.170 Interval 2739 (1369000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9528 3 episodes - episode_reward: -766.526 [-775.681, -754.292] - loss: 2943.550 - mae: 1223.531 - mean_q: 1637.292 Interval 2740 (1369500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2870 2 episodes - episode_reward: -620.701 [-637.215, -604.186] - loss: 3115.386 - mae: 1225.338 - mean_q: 1639.106 Interval 2741 (1370000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0552 1 episodes - episode_reward: -719.882 [-719.882, -719.882] - loss: 2260.365 - mae: 1206.351 - mean_q: 1612.764 Interval 2742 (1370500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2252 1 episodes - episode_reward: -1044.091 [-1044.091, -1044.091] - loss: 2705.728 - mae: 1205.375 - mean_q: 1611.923 Interval 2743 (1371000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5291 3 episodes - episode_reward: -644.645 [-900.442, -191.220] - loss: 2267.572 - mae: 1200.628 - mean_q: 1607.052 Interval 2744 (1371500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4904 3 episodes - episode_reward: -633.924 [-752.738, -403.155] - loss: 1822.207 - mae: 1199.267 - mean_q: 1604.706 Interval 2745 (1372000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0194 2 episodes - episode_reward: -514.497 [-791.165, -237.829] - loss: 1813.280 - mae: 1193.731 - mean_q: 1595.533 Interval 2746 (1372500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5235 2 episodes - episode_reward: -438.174 [-677.535, -198.812] - loss: 2425.004 - mae: 1212.507 - mean_q: 1619.719 Interval 2747 (1373000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3807 1 episodes - episode_reward: -145.549 [-145.549, -145.549] - loss: 1702.891 - mae: 1212.364 - mean_q: 1617.609 Interval 2748 (1373500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1652 Interval 2749 (1374000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1708 Interval 2750 (1374500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2411 Interval 2751 (1375000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2305 Interval 2752 (1375500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1411 Interval 2753 (1376000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.2130 Interval 2754 (1376500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.2501 Interval 2755 (1377000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.3137 Interval 2756 (1377500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.2156 1 episodes - episode_reward: -1852.631 [-1852.631, -1852.631] - loss: 1353.082 - mae: 1090.951 - mean_q: 1447.657 Interval 2757 (1378000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2334 Interval 2758 (1378500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2229 Interval 2759 (1379000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1959 Interval 2760 (1379500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1175 Interval 2761 (1380000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2748 Interval 2762 (1380500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -1.0148 1 episodes - episode_reward: -1028.708 [-1028.708, -1028.708] - loss: 1045.999 - mae: 980.414 - mean_q: 1303.611 Interval 2763 (1381000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3757 Interval 2764 (1381500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3192 Interval 2765 (1382000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5260 2 episodes - episode_reward: -376.201 [-616.698, -135.704] - loss: 1085.774 - mae: 965.069 - mean_q: 1285.467 Interval 2766 (1382500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1086 4 episodes - episode_reward: -376.748 [-969.804, -117.146] - loss: 1037.493 - mae: 938.963 - mean_q: 1250.917 Interval 2767 (1383000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2214 Interval 2768 (1383500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4937 Interval 2769 (1384000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0055 Interval 2770 (1384500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2967 Interval 2771 (1385000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7451 1 episodes - episode_reward: -797.730 [-797.730, -797.730] - loss: 871.461 - mae: 895.665 - mean_q: 1195.366 Interval 2772 (1385500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.3349 Interval 2773 (1386000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3906 Interval 2774 (1386500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2166 Interval 2775 (1387000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0456 1 episodes - episode_reward: -884.669 [-884.669, -884.669] - loss: 902.643 - mae: 857.680 - mean_q: 1141.505 Interval 2776 (1387500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7630 1 episodes - episode_reward: -1033.796 [-1033.796, -1033.796] - loss: 803.153 - mae: 847.895 - mean_q: 1127.460 Interval 2777 (1388000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8097 Interval 2778 (1388500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2314 1 episodes - episode_reward: -827.841 [-827.841, -827.841] - loss: 795.745 - mae: 819.653 - mean_q: 1089.578 Interval 2779 (1389000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2102 Interval 2780 (1389500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3221 Interval 2781 (1390000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6979 Interval 2782 (1390500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -9.6374 1 episodes - episode_reward: -5762.639 [-5762.639, -5762.639] - loss: 691.435 - mae: 796.915 - mean_q: 1057.082 Interval 2783 (1391000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4282 1 episodes - episode_reward: -708.382 [-708.382, -708.382] - loss: 750.272 - mae: 781.419 - mean_q: 1035.986 Interval 2784 (1391500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6727 Interval 2785 (1392000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2752 Interval 2786 (1392500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0535 Interval 2787 (1393000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3771 Interval 2788 (1393500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0388 Interval 2789 (1394000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2089 Interval 2790 (1394500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1832 Interval 2791 (1395000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3416 Interval 2792 (1395500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9407 3 episodes - episode_reward: -481.880 [-1150.367, -140.722] - loss: 537.763 - mae: 673.321 - mean_q: 887.781 Interval 2793 (1396000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5407 Interval 2794 (1396500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4528 1 episodes - episode_reward: -709.899 [-709.899, -709.899] - loss: 492.280 - mae: 650.068 - mean_q: 856.546 Interval 2795 (1397000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7188 1 episodes - episode_reward: -567.857 [-567.857, -567.857] - loss: 496.104 - mae: 638.399 - mean_q: 841.002 Interval 2796 (1397500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3600 1 episodes - episode_reward: -1143.406 [-1143.406, -1143.406] - loss: 526.899 - mae: 624.391 - mean_q: 821.929 Interval 2797 (1398000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1569 Interval 2798 (1398500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7110 Interval 2799 (1399000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9127 3 episodes - episode_reward: -327.908 [-686.505, -111.992] - loss: 611.233 - mae: 597.318 - mean_q: 785.756 Interval 2800 (1399500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1843 Interval 2801 (1400000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1619 Interval 2802 (1400500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2259 Interval 2803 (1401000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3309 Interval 2804 (1401500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7032 1 episodes - episode_reward: -851.032 [-851.032, -851.032] - loss: 591.059 - mae: 545.314 - mean_q: 714.688 Interval 2805 (1402000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9675 2 episodes - episode_reward: -263.786 [-323.824, -203.749] - loss: 644.542 - mae: 539.907 - mean_q: 707.835 Interval 2806 (1402500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0643 2 episodes - episode_reward: -192.271 [-247.233, -137.309] - loss: 624.055 - mae: 539.472 - mean_q: 706.452 Interval 2807 (1403000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3054 4 episodes - episode_reward: -317.026 [-539.437, -148.576] - loss: 629.678 - mae: 530.373 - mean_q: 693.058 Interval 2808 (1403500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9607 1 episodes - episode_reward: -322.055 [-322.055, -322.055] - loss: 623.848 - mae: 526.461 - mean_q: 688.090 Interval 2809 (1404000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5973 2 episodes - episode_reward: -455.553 [-464.076, -447.030] - loss: 637.407 - mae: 519.743 - mean_q: 678.653 Interval 2810 (1404500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8419 3 episodes - episode_reward: -298.913 [-462.898, -130.667] - loss: 668.581 - mae: 510.136 - mean_q: 666.236 Interval 2811 (1405000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3158 3 episodes - episode_reward: -366.727 [-556.352, -206.384] - loss: 588.127 - mae: 495.929 - mean_q: 646.149 Interval 2812 (1405500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8799 2 episodes - episode_reward: -355.258 [-434.563, -275.954] - loss: 625.631 - mae: 491.944 - mean_q: 641.293 Interval 2813 (1406000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2656 4 episodes - episode_reward: -235.032 [-682.252, 38.677] - loss: 638.217 - mae: 478.956 - mean_q: 623.211 Interval 2814 (1406500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0088 4 episodes - episode_reward: -137.448 [-254.149, -34.533] - loss: 673.223 - mae: 468.792 - mean_q: 608.800 Interval 2815 (1407000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3494 2 episodes - episode_reward: -289.721 [-614.926, 35.484] - loss: 784.913 - mae: 462.257 - mean_q: 599.268 Interval 2816 (1407500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6667 3 episodes - episode_reward: -400.694 [-576.003, -102.669] - loss: 647.938 - mae: 451.878 - mean_q: 585.262 Interval 2817 (1408000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3943 3 episodes - episode_reward: -345.672 [-460.892, -222.278] - loss: 665.336 - mae: 446.815 - mean_q: 578.344 Interval 2818 (1408500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0391 5 episodes - episode_reward: -276.819 [-574.812, -105.093] - loss: 689.833 - mae: 442.546 - mean_q: 572.446 Interval 2819 (1409000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5844 4 episodes - episode_reward: -200.929 [-485.976, -67.011] - loss: 636.555 - mae: 437.178 - mean_q: 565.070 Interval 2820 (1409500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7200 2 episodes - episode_reward: -428.503 [-593.600, -263.405] - loss: 607.977 - mae: 435.480 - mean_q: 562.969 Interval 2821 (1410000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9519 2 episodes - episode_reward: -248.961 [-255.504, -242.417] - loss: 590.618 - mae: 432.412 - mean_q: 558.601 Interval 2822 (1410500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3915 5 episodes - episode_reward: -333.940 [-580.925, -102.843] - loss: 644.617 - mae: 424.689 - mean_q: 547.399 Interval 2823 (1411000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4925 4 episodes - episode_reward: -194.958 [-346.313, -100.000] - loss: 639.378 - mae: 426.223 - mean_q: 549.323 Interval 2824 (1411500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2976 3 episodes - episode_reward: -547.719 [-900.985, -190.463] - loss: 625.154 - mae: 419.963 - mean_q: 541.002 Interval 2825 (1412000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2133 1 episodes - episode_reward: -99.318 [-99.318, -99.318] - loss: 597.456 - mae: 418.187 - mean_q: 538.492 Interval 2826 (1412500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8495 4 episodes - episode_reward: -340.631 [-455.411, -227.292] - loss: 571.959 - mae: 417.462 - mean_q: 538.506 Interval 2827 (1413000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7107 3 episodes - episode_reward: -635.704 [-1129.962, -383.214] - loss: 548.558 - mae: 414.071 - mean_q: 532.918 Interval 2828 (1413500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8763 3 episodes - episode_reward: -435.536 [-973.731, -55.119] - loss: 667.809 - mae: 418.003 - mean_q: 538.828 Interval 2829 (1414000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6833 4 episodes - episode_reward: -354.941 [-623.257, -162.901] - loss: 570.328 - mae: 414.626 - mean_q: 534.428 Interval 2830 (1414500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9814 3 episodes - episode_reward: -507.970 [-941.177, -212.585] - loss: 559.056 - mae: 411.774 - mean_q: 530.415 Interval 2831 (1415000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4703 4 episodes - episode_reward: -304.899 [-454.900, -152.325] - loss: 592.089 - mae: 407.916 - mean_q: 525.007 Interval 2832 (1415500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7693 3 episodes - episode_reward: -252.013 [-426.976, -136.857] - loss: 525.771 - mae: 411.861 - mean_q: 529.931 Interval 2833 (1416000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8697 2 episodes - episode_reward: -518.856 [-916.268, -121.444] - loss: 508.313 - mae: 411.281 - mean_q: 528.523 Interval 2834 (1416500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3393 6 episodes - episode_reward: -507.656 [-935.200, -122.706] - loss: 603.541 - mae: 413.905 - mean_q: 531.255 Interval 2835 (1417000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2948 4 episodes - episode_reward: -332.894 [-512.452, -126.014] - loss: 549.570 - mae: 417.818 - mean_q: 536.094 Interval 2836 (1417500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7368 3 episodes - episode_reward: -616.840 [-1558.722, -122.163] - loss: 425.274 - mae: 415.211 - mean_q: 532.610 Interval 2837 (1418000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8172 3 episodes - episode_reward: -478.457 [-577.516, -380.562] - loss: 459.328 - mae: 417.003 - mean_q: 535.322 Interval 2838 (1418500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2245 4 episodes - episode_reward: -352.016 [-578.620, -137.399] - loss: 443.709 - mae: 423.787 - mean_q: 543.572 Interval 2839 (1419000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2036 Interval 2840 (1419500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8142 2 episodes - episode_reward: -494.154 [-636.778, -351.530] - loss: 398.789 - mae: 441.921 - mean_q: 568.709 Interval 2841 (1420000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5424 2 episodes - episode_reward: -486.398 [-573.051, -399.745] - loss: 494.064 - mae: 440.974 - mean_q: 565.726 Interval 2842 (1420500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8386 4 episodes - episode_reward: -537.334 [-592.647, -457.782] - loss: 478.947 - mae: 435.650 - mean_q: 557.764 Interval 2843 (1421000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3632 7 episodes - episode_reward: -390.582 [-605.301, -100.000] - loss: 492.872 - mae: 434.040 - mean_q: 555.148 Interval 2844 (1421500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.6382 4 episodes - episode_reward: -461.333 [-810.249, -126.155] - loss: 494.176 - mae: 430.717 - mean_q: 549.398 Interval 2845 (1422000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1677 4 episodes - episode_reward: -347.216 [-577.392, -157.433] - loss: 533.691 - mae: 427.327 - mean_q: 544.700 Interval 2846 (1422500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3317 3 episodes - episode_reward: -445.652 [-486.697, -411.673] - loss: 430.408 - mae: 430.872 - mean_q: 549.269 Interval 2847 (1423000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7933 7 episodes - episode_reward: -347.083 [-714.272, -100.000] - loss: 492.726 - mae: 426.849 - mean_q: 542.174 Interval 2848 (1423500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4319 5 episodes - episode_reward: -433.800 [-535.384, -341.932] - loss: 445.090 - mae: 413.001 - mean_q: 522.348 Interval 2849 (1424000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9602 4 episodes - episode_reward: -620.281 [-884.817, -396.647] - loss: 570.594 - mae: 405.840 - mean_q: 510.932 Interval 2850 (1424500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2271 6 episodes - episode_reward: -443.947 [-706.682, -197.506] - loss: 459.025 - mae: 391.449 - mean_q: 489.375 Interval 2851 (1425000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7938 3 episodes - episode_reward: -701.728 [-767.311, -588.741] - loss: 443.820 - mae: 387.680 - mean_q: 483.820 Interval 2852 (1425500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7528 5 episodes - episode_reward: -486.180 [-580.550, -224.567] - loss: 397.897 - mae: 382.017 - mean_q: 475.943 Interval 2853 (1426000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.8182 6 episodes - episode_reward: -521.814 [-776.142, -111.241] - loss: 412.629 - mae: 365.837 - mean_q: 452.980 Interval 2854 (1426500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3582 4 episodes - episode_reward: -510.825 [-758.078, -136.785] - loss: 493.042 - mae: 359.594 - mean_q: 443.609 Interval 2855 (1427000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.1600 5 episodes - episode_reward: -532.119 [-672.130, -356.344] - loss: 509.405 - mae: 357.474 - mean_q: 439.889 Interval 2856 (1427500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4878 5 episodes - episode_reward: -554.807 [-745.343, -175.316] - loss: 459.733 - mae: 359.334 - mean_q: 443.859 Interval 2857 (1428000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1508 3 episodes - episode_reward: -498.555 [-722.438, -234.734] - loss: 676.045 - mae: 358.027 - mean_q: 442.059 Interval 2858 (1428500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0680 5 episodes - episode_reward: -335.798 [-495.141, -155.776] - loss: 860.643 - mae: 362.619 - mean_q: 449.805 Interval 2859 (1429000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3066 6 episodes - episode_reward: -267.782 [-475.858, -80.476] - loss: 992.463 - mae: 367.239 - mean_q: 454.274 Interval 2860 (1429500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6388 6 episodes - episode_reward: -243.492 [-476.373, -43.506] - loss: 690.942 - mae: 363.568 - mean_q: 448.365 Interval 2861 (1430000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6193 5 episodes - episode_reward: -523.128 [-1149.546, -324.802] - loss: 664.596 - mae: 362.155 - mean_q: 446.411 Interval 2862 (1430500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0555 4 episodes - episode_reward: -223.338 [-344.442, -34.096] - loss: 971.916 - mae: 358.034 - mean_q: 439.637 Interval 2863 (1431000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0339 6 episodes - episode_reward: -286.062 [-734.733, -74.443] - loss: 980.763 - mae: 363.681 - mean_q: 446.391 Interval 2864 (1431500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4528 5 episodes - episode_reward: -240.848 [-400.969, -113.527] - loss: 726.277 - mae: 346.559 - mean_q: 421.027 Interval 2865 (1432000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9271 3 episodes - episode_reward: -139.865 [-164.853, -107.952] - loss: 866.277 - mae: 352.796 - mean_q: 430.175 Interval 2866 (1432500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7934 4 episodes - episode_reward: -397.742 [-569.009, -307.031] - loss: 850.718 - mae: 354.077 - mean_q: 429.999 Interval 2867 (1433000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6827 5 episodes - episode_reward: -508.692 [-859.123, -85.806] - loss: 605.404 - mae: 352.795 - mean_q: 427.149 Interval 2868 (1433500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4466 3 episodes - episode_reward: -401.942 [-514.015, -198.611] - loss: 922.392 - mae: 347.613 - mean_q: 419.127 Interval 2869 (1434000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4610 3 episodes - episode_reward: -479.328 [-610.106, -267.606] - loss: 589.727 - mae: 337.833 - mean_q: 403.091 Interval 2870 (1434500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2725 6 episodes - episode_reward: -245.667 [-570.791, -91.569] - loss: 763.379 - mae: 331.120 - mean_q: 392.147 Interval 2871 (1435000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2013 3 episodes - episode_reward: -544.680 [-782.529, -334.930] - loss: 998.131 - mae: 320.464 - mean_q: 375.911 Interval 2872 (1435500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2542 1 episodes - episode_reward: -746.952 [-746.952, -746.952] - loss: 656.901 - mae: 309.735 - mean_q: 361.116 Interval 2873 (1436000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1042 2 episodes - episode_reward: -348.688 [-433.434, -263.942] - loss: 665.997 - mae: 307.492 - mean_q: 357.329 Interval 2874 (1436500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2152 Interval 2875 (1437000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6919 1 episodes - episode_reward: -1212.911 [-1212.911, -1212.911] - loss: 409.116 - mae: 304.003 - mean_q: 350.804 Interval 2876 (1437500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1595 2 episodes - episode_reward: -535.804 [-751.767, -319.841] - loss: 898.553 - mae: 300.193 - mean_q: 345.203 Interval 2877 (1438000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8999 2 episodes - episode_reward: -401.955 [-703.909, -100.000] - loss: 562.879 - mae: 289.169 - mean_q: 328.565 Interval 2878 (1438500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.5346 4 episodes - episode_reward: -840.009 [-1732.645, -480.790] - loss: 393.728 - mae: 283.556 - mean_q: 319.384 Interval 2879 (1439000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6691 4 episodes - episode_reward: -424.233 [-746.396, -100.000] - loss: 346.049 - mae: 269.280 - mean_q: 298.192 Interval 2880 (1439500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9287 3 episodes - episode_reward: -697.041 [-883.861, -447.132] - loss: 378.370 - mae: 262.051 - mean_q: 288.195 Interval 2881 (1440000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5493 1 episodes - episode_reward: -608.219 [-608.219, -608.219] - loss: 513.930 - mae: 245.120 - mean_q: 263.889 Interval 2882 (1440500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7840 2 episodes - episode_reward: -859.066 [-1252.327, -465.805] - loss: 379.425 - mae: 239.465 - mean_q: 256.728 Interval 2883 (1441000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1245 2 episodes - episode_reward: -442.589 [-517.374, -367.804] - loss: 490.121 - mae: 236.638 - mean_q: 252.764 Interval 2884 (1441500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3188 2 episodes - episode_reward: -873.074 [-1230.302, -515.847] - loss: 351.227 - mae: 233.015 - mean_q: 246.787 Interval 2885 (1442000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3430 3 episodes - episode_reward: -666.200 [-838.151, -420.009] - loss: 299.413 - mae: 229.620 - mean_q: 242.575 Interval 2886 (1442500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0208 5 episodes - episode_reward: -541.708 [-723.954, -284.951] - loss: 660.841 - mae: 225.123 - mean_q: 235.931 Interval 2887 (1443000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9046 3 episodes - episode_reward: -600.117 [-771.795, -484.964] - loss: 296.723 - mae: 212.105 - mean_q: 215.264 Interval 2888 (1443500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6276 5 episodes - episode_reward: -384.114 [-643.074, -225.050] - loss: 331.516 - mae: 206.898 - mean_q: 208.093 Interval 2889 (1444000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1702 3 episodes - episode_reward: -879.314 [-919.462, -807.731] - loss: 318.809 - mae: 201.032 - mean_q: 197.807 Interval 2890 (1444500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6376 3 episodes - episode_reward: -459.804 [-671.786, -187.614] - loss: 359.797 - mae: 196.833 - mean_q: 191.906 Interval 2891 (1445000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4455 7 episodes - episode_reward: -314.358 [-552.568, -122.473] - loss: 331.611 - mae: 185.593 - mean_q: 174.153 Interval 2892 (1445500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4629 2 episodes - episode_reward: -480.117 [-533.683, -426.550] - loss: 274.161 - mae: 178.616 - mean_q: 163.752 Interval 2893 (1446000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5120 3 episodes - episode_reward: -504.874 [-693.600, -315.949] - loss: 361.608 - mae: 174.905 - mean_q: 157.047 Interval 2894 (1446500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7268 3 episodes - episode_reward: -501.397 [-740.541, -331.118] - loss: 338.440 - mae: 169.394 - mean_q: 149.484 Interval 2895 (1447000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.4342 8 episodes - episode_reward: -567.570 [-1157.785, -206.041] - loss: 364.065 - mae: 166.695 - mean_q: 145.909 Interval 2896 (1447500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.8979 4 episodes - episode_reward: -705.533 [-974.719, -494.025] - loss: 345.906 - mae: 162.911 - mean_q: 140.439 Interval 2897 (1448000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.5159 2 episodes - episode_reward: -1473.058 [-1979.304, -966.812] - loss: 440.012 - mae: 162.588 - mean_q: 138.483 Interval 2898 (1448500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4409 1 episodes - episode_reward: -654.810 [-654.810, -654.810] - loss: 317.491 - mae: 158.636 - mean_q: 133.162 Interval 2899 (1449000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9909 3 episodes - episode_reward: -431.838 [-658.912, -302.298] - loss: 357.281 - mae: 158.645 - mean_q: 132.062 Interval 2900 (1449500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2434 2 episodes - episode_reward: -620.870 [-903.304, -338.436] - loss: 309.974 - mae: 157.935 - mean_q: 129.553 Interval 2901 (1450000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8567 2 episodes - episode_reward: -837.031 [-1100.840, -573.222] - loss: 312.727 - mae: 158.925 - mean_q: 128.683 Interval 2902 (1450500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0680 4 episodes - episode_reward: -735.455 [-1114.079, -478.366] - loss: 413.873 - mae: 161.110 - mean_q: 130.556 Interval 2903 (1451000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.1008 6 episodes - episode_reward: -413.275 [-849.909, -127.769] - loss: 349.096 - mae: 159.132 - mean_q: 127.259 Interval 2904 (1451500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.2026 7 episodes - episode_reward: -439.356 [-670.658, -130.962] - loss: 347.041 - mae: 157.065 - mean_q: 122.809 Interval 2905 (1452000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9814 6 episodes - episode_reward: -411.691 [-799.857, -100.000] - loss: 358.196 - mae: 149.392 - mean_q: 109.449 Interval 2906 (1452500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.7638 4 episodes - episode_reward: -708.750 [-718.862, -687.197] - loss: 313.983 - mae: 147.769 - mean_q: 105.216 Interval 2907 (1453000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1180 5 episodes - episode_reward: -417.980 [-787.908, -245.631] - loss: 325.233 - mae: 147.036 - mean_q: 103.018 Interval 2908 (1453500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8946 4 episodes - episode_reward: -486.400 [-717.273, -237.237] - loss: 244.810 - mae: 146.075 - mean_q: 101.075 Interval 2909 (1454000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3222 4 episodes - episode_reward: -654.549 [-825.431, -333.859] - loss: 306.135 - mae: 143.421 - mean_q: 97.178 Interval 2910 (1454500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2128 5 episodes - episode_reward: -450.741 [-650.346, -262.178] - loss: 322.042 - mae: 140.041 - mean_q: 89.960 Interval 2911 (1455000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3525 3 episodes - episode_reward: -646.628 [-867.634, -421.662] - loss: 320.167 - mae: 144.647 - mean_q: 97.108 Interval 2912 (1455500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1291 1 episodes - episode_reward: -473.258 [-473.258, -473.258] - loss: 341.457 - mae: 147.050 - mean_q: 101.056 Interval 2913 (1456000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9453 4 episodes - episode_reward: -850.313 [-2436.993, -205.353] - loss: 355.783 - mae: 149.683 - mean_q: 105.866 Interval 2914 (1456500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -10.1234 2 episodes - episode_reward: -1345.078 [-2069.599, -620.558] - loss: 397.175 - mae: 150.959 - mean_q: 108.036 Interval 2915 (1457000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.4056 4 episodes - episode_reward: -1607.839 [-3294.736, -280.697] - loss: 379.331 - mae: 155.411 - mean_q: 114.214 Interval 2916 (1457500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7387 5 episodes - episode_reward: -486.269 [-657.823, -326.338] - loss: 432.149 - mae: 163.178 - mean_q: 126.982 Interval 2917 (1458000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4547 4 episodes - episode_reward: -501.408 [-896.150, -341.210] - loss: 406.506 - mae: 161.516 - mean_q: 124.102 Interval 2918 (1458500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0443 6 episodes - episode_reward: -364.265 [-560.196, -118.704] - loss: 696.237 - mae: 164.864 - mean_q: 129.212 Interval 2919 (1459000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3311 2 episodes - episode_reward: -728.989 [-891.279, -566.700] - loss: 411.658 - mae: 178.084 - mean_q: 149.042 Interval 2920 (1459500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.8643 4 episodes - episode_reward: -692.894 [-1165.498, -287.754] - loss: 396.015 - mae: 177.112 - mean_q: 147.347 Interval 2921 (1460000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7474 4 episodes - episode_reward: -513.939 [-691.204, -100.000] - loss: 467.375 - mae: 181.593 - mean_q: 153.181 Interval 2922 (1460500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3516 4 episodes - episode_reward: -430.844 [-549.245, -337.185] - loss: 535.686 - mae: 184.707 - mean_q: 158.440 Interval 2923 (1461000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2377 4 episodes - episode_reward: -531.108 [-725.829, -288.586] - loss: 701.856 - mae: 186.251 - mean_q: 159.388 Interval 2924 (1461500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.0816 4 episodes - episode_reward: -526.924 [-628.612, -403.681] - loss: 514.445 - mae: 192.213 - mean_q: 168.590 Interval 2925 (1462000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -9.5942 3 episodes - episode_reward: -1972.640 [-3926.182, -351.742] - loss: 535.594 - mae: 193.289 - mean_q: 168.253 Interval 2926 (1462500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.0201 7 episodes - episode_reward: -483.143 [-807.031, -252.018] - loss: 546.360 - mae: 194.709 - mean_q: 170.920 Interval 2927 (1463000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.0896 4 episodes - episode_reward: -780.052 [-1445.616, -503.185] - loss: 645.021 - mae: 193.438 - mean_q: 168.203 Interval 2928 (1463500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.7673 10 episodes - episode_reward: -383.860 [-726.762, -150.480] - loss: 596.034 - mae: 193.919 - mean_q: 168.215 Interval 2929 (1464000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3290 3 episodes - episode_reward: -504.334 [-623.559, -375.448] - loss: 631.365 - mae: 198.206 - mean_q: 174.609 Interval 2930 (1464500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0603 4 episodes - episode_reward: -387.410 [-674.129, -98.636] - loss: 656.670 - mae: 195.503 - mean_q: 170.170 Interval 2931 (1465000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.0824 6 episodes - episode_reward: -630.735 [-1648.381, -382.657] - loss: 683.752 - mae: 195.065 - mean_q: 170.607 Interval 2932 (1465500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6387 1 episodes - episode_reward: -595.647 [-595.647, -595.647] - loss: 714.888 - mae: 196.831 - mean_q: 173.875 Interval 2933 (1466000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -9.6850 9 episodes - episode_reward: -602.144 [-2482.088, -98.076] - loss: 656.947 - mae: 195.031 - mean_q: 171.488 Interval 2934 (1466500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.2469 4 episodes - episode_reward: -784.086 [-1757.743, -420.438] - loss: 680.631 - mae: 195.536 - mean_q: 173.339 Interval 2935 (1467000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4104 5 episodes - episode_reward: -360.106 [-610.201, -136.181] - loss: 683.852 - mae: 193.199 - mean_q: 169.034 Interval 2936 (1467500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8549 8 episodes - episode_reward: -176.446 [-330.418, -117.719] - loss: 826.528 - mae: 195.960 - mean_q: 173.881 Interval 2937 (1468000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7865 3 episodes - episode_reward: -336.685 [-610.948, -158.254] - loss: 699.089 - mae: 207.621 - mean_q: 193.210 Interval 2938 (1468500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8266 9 episodes - episode_reward: -197.480 [-622.749, -92.258] - loss: 723.089 - mae: 197.707 - mean_q: 175.907 Interval 2939 (1469000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1726 4 episodes - episode_reward: -169.987 [-212.050, -100.000] - loss: 695.166 - mae: 193.133 - mean_q: 167.777 Interval 2940 (1469500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2687 3 episodes - episode_reward: -475.328 [-741.538, -135.678] - loss: 825.235 - mae: 201.340 - mean_q: 180.268 Interval 2941 (1470000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8528 2 episodes - episode_reward: -375.708 [-643.108, -108.308] - loss: 941.233 - mae: 200.528 - mean_q: 178.223 Interval 2942 (1470500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4149 6 episodes - episode_reward: -259.544 [-464.135, -171.851] - loss: 935.727 - mae: 201.262 - mean_q: 179.966 Interval 2943 (1471000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7124 6 episodes - episode_reward: -185.558 [-208.716, -143.355] - loss: 1087.572 - mae: 212.493 - mean_q: 196.088 Interval 2944 (1471500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3807 3 episodes - episode_reward: -405.939 [-831.893, -144.393] - loss: 1479.091 - mae: 220.765 - mean_q: 206.657 Interval 2945 (1472000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0259 4 episodes - episode_reward: -301.374 [-662.509, -162.858] - loss: 1247.803 - mae: 235.892 - mean_q: 230.242 Interval 2946 (1472500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7416 8 episodes - episode_reward: -157.014 [-208.389, -111.058] - loss: 1466.920 - mae: 241.359 - mean_q: 237.983 Interval 2947 (1473000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4599 3 episodes - episode_reward: -458.144 [-622.114, -159.113] - loss: 1783.708 - mae: 270.728 - mean_q: 281.355 Interval 2948 (1473500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2918 4 episodes - episode_reward: -196.270 [-246.980, -102.043] - loss: 1715.951 - mae: 299.277 - mean_q: 322.529 Interval 2949 (1474000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0009 3 episodes - episode_reward: -396.446 [-714.888, -196.133] - loss: 1987.552 - mae: 335.798 - mean_q: 373.956 Interval 2950 (1474500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6023 3 episodes - episode_reward: -321.786 [-634.450, -161.776] - loss: 2621.883 - mae: 379.834 - mean_q: 435.167 Interval 2951 (1475000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9435 2 episodes - episode_reward: -439.425 [-742.541, -136.309] - loss: 2537.259 - mae: 422.034 - mean_q: 495.992 Interval 2952 (1475500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0212 3 episodes - episode_reward: -251.826 [-290.857, -225.747] - loss: 3577.236 - mae: 484.178 - mean_q: 582.576 Interval 2953 (1476000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.8204 2 episodes - episode_reward: -734.006 [-1287.768, -180.245] - loss: 3379.791 - mae: 533.826 - mean_q: 652.345 Interval 2954 (1476500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2426 3 episodes - episode_reward: -1129.730 [-1737.284, -183.702] - loss: 3810.288 - mae: 594.056 - mean_q: 738.613 Interval 2955 (1477000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4319 5 episodes - episode_reward: -169.003 [-249.700, -100.000] - loss: 3264.307 - mae: 652.847 - mean_q: 820.514 Interval 2956 (1477500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0783 2 episodes - episode_reward: -1080.781 [-1929.969, -231.592] - loss: 3867.060 - mae: 677.980 - mean_q: 853.253 Interval 2957 (1478000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4506 4 episodes - episode_reward: -625.379 [-1865.528, -155.255] - loss: 3479.768 - mae: 721.771 - mean_q: 913.247 Interval 2958 (1478500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7053 3 episodes - episode_reward: -211.046 [-270.914, -137.072] - loss: 3630.436 - mae: 770.017 - mean_q: 978.637 Interval 2959 (1479000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9959 6 episodes - episode_reward: -442.305 [-1771.513, -114.268] - loss: 4532.714 - mae: 822.702 - mean_q: 1049.091 Interval 2960 (1479500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4606 6 episodes - episode_reward: -169.200 [-211.810, -138.918] - loss: 4426.502 - mae: 865.148 - mean_q: 1107.549 Interval 2961 (1480000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0757 4 episodes - episode_reward: -533.350 [-1718.174, -112.627] - loss: 4645.967 - mae: 872.678 - mean_q: 1115.147 Interval 2962 (1480500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6088 4 episodes - episode_reward: -606.757 [-1825.133, -100.000] - loss: 5552.404 - mae: 898.398 - mean_q: 1148.529 Interval 2963 (1481000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4480 9 episodes - episode_reward: -183.250 [-208.223, -127.761] - loss: 6025.448 - mae: 924.527 - mean_q: 1184.513 Interval 2964 (1481500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7901 2 episodes - episode_reward: -1124.470 [-2028.059, -220.881] - loss: 3914.341 - mae: 933.187 - mean_q: 1197.772 Interval 2965 (1482000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9182 1 episodes - episode_reward: -1925.457 [-1925.457, -1925.457] - loss: 5472.224 - mae: 953.968 - mean_q: 1224.119 Interval 2966 (1482500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8665 1 episodes - episode_reward: -1902.784 [-1902.784, -1902.784] - loss: 5572.254 - mae: 996.968 - mean_q: 1285.060 Interval 2967 (1483000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2299 3 episodes - episode_reward: -659.334 [-1778.001, -100.000] - loss: 6208.101 - mae: 1042.572 - mean_q: 1347.447 Interval 2968 (1483500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.1029 3 episodes - episode_reward: -1172.505 [-1715.701, -183.054] - loss: 6051.194 - mae: 1092.230 - mean_q: 1416.211 Interval 2969 (1484000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5869 3 episodes - episode_reward: -207.952 [-252.419, -144.185] - loss: 5896.783 - mae: 1113.286 - mean_q: 1440.192 Interval 2970 (1484500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7060 4 episodes - episode_reward: -753.339 [-1494.521, -164.255] - loss: 6183.370 - mae: 1151.727 - mean_q: 1496.390 Interval 2971 (1485000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3063 4 episodes - episode_reward: -399.837 [-1117.565, -100.000] - loss: 5584.435 - mae: 1171.768 - mean_q: 1524.587 Interval 2972 (1485500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6566 1 episodes - episode_reward: -920.762 [-920.762, -920.762] - loss: 4926.961 - mae: 1205.503 - mean_q: 1572.307 Interval 2973 (1486000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0504 4 episodes - episode_reward: -284.603 [-740.639, -100.000] - loss: 6579.342 - mae: 1223.173 - mean_q: 1595.905 Interval 2974 (1486500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1920 2 episodes - episode_reward: -366.992 [-545.931, -188.053] - loss: 6030.168 - mae: 1225.866 - mean_q: 1597.350 Interval 2975 (1487000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1736 3 episodes - episode_reward: -602.885 [-1167.507, -131.684] - loss: 5648.300 - mae: 1268.545 - mean_q: 1657.358 Interval 2976 (1487500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8904 5 episodes - episode_reward: -335.294 [-404.886, -258.601] - loss: 7309.351 - mae: 1288.443 - mean_q: 1684.004 Interval 2977 (1488000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4316 2 episodes - episode_reward: -751.455 [-1087.936, -414.975] - loss: 5338.692 - mae: 1281.765 - mean_q: 1676.596 Interval 2978 (1488500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -10.3200 2 episodes - episode_reward: -2796.373 [-5156.468, -436.278] - loss: 7010.241 - mae: 1332.856 - mean_q: 1745.994 Interval 2979 (1489000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6001 5 episodes - episode_reward: -437.623 [-639.696, -318.448] - loss: 5820.616 - mae: 1330.757 - mean_q: 1742.376 Interval 2980 (1489500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8872 1 episodes - episode_reward: -1874.137 [-1874.137, -1874.137] - loss: 6887.504 - mae: 1371.204 - mean_q: 1794.750 Interval 2981 (1490000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1218 3 episodes - episode_reward: -749.329 [-1851.488, -174.405] - loss: 6071.441 - mae: 1339.357 - mean_q: 1749.383 Interval 2982 (1490500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.1733 2 episodes - episode_reward: -870.148 [-1320.519, -419.778] - loss: 5017.621 - mae: 1371.986 - mean_q: 1799.781 Interval 2983 (1491000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.1068 4 episodes - episode_reward: -741.122 [-2162.234, -130.402] - loss: 5242.805 - mae: 1349.708 - mean_q: 1774.454 Interval 2984 (1491500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1359 2 episodes - episode_reward: -690.997 [-880.884, -501.109] - loss: 9139.596 - mae: 1347.439 - mean_q: 1777.015 Interval 2985 (1492000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0138 1 episodes - episode_reward: -1137.587 [-1137.587, -1137.587] - loss: 6082.553 - mae: 1368.115 - mean_q: 1808.330 Interval 2986 (1492500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6953 2 episodes - episode_reward: -638.004 [-675.596, -600.412] - loss: 6427.876 - mae: 1384.943 - mean_q: 1826.055 Interval 2987 (1493000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1309 4 episodes - episode_reward: -421.325 [-588.531, -146.245] - loss: 5978.370 - mae: 1389.920 - mean_q: 1830.919 Interval 2988 (1493500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4319 5 episodes - episode_reward: -296.643 [-487.679, -134.220] - loss: 7947.732 - mae: 1363.269 - mean_q: 1791.575 Interval 2989 (1494000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6491 3 episodes - episode_reward: -488.383 [-651.910, -264.648] - loss: 5768.372 - mae: 1342.647 - mean_q: 1761.858 Interval 2990 (1494500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5733 2 episodes - episode_reward: -397.654 [-681.430, -113.877] - loss: 4514.306 - mae: 1303.164 - mean_q: 1709.468 Interval 2991 (1495000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6580 1 episodes - episode_reward: -771.035 [-771.035, -771.035] - loss: 5516.853 - mae: 1306.791 - mean_q: 1713.262 Interval 2992 (1495500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -10.0446 Interval 2993 (1496000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.1425 4 episodes - episode_reward: -1871.328 [-5949.553, -96.931] - loss: 5503.539 - mae: 1239.582 - mean_q: 1621.489 Interval 2994 (1496500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7559 2 episodes - episode_reward: -750.314 [-1365.460, -135.168] - loss: 4217.819 - mae: 1212.242 - mean_q: 1585.344 Interval 2995 (1497000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8480 4 episodes - episode_reward: -614.416 [-1591.222, -115.325] - loss: 4079.284 - mae: 1183.007 - mean_q: 1543.729 Interval 2996 (1497500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0715 4 episodes - episode_reward: -260.228 [-549.094, -100.000] - loss: 4103.905 - mae: 1161.349 - mean_q: 1516.791 Interval 2997 (1498000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -10.7101 1 episodes - episode_reward: -4840.114 [-4840.114, -4840.114] - loss: 3696.582 - mae: 1133.251 - mean_q: 1476.967 Interval 2998 (1498500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2012 1 episodes - episode_reward: -1320.468 [-1320.468, -1320.468] - loss: 3592.413 - mae: 1104.263 - mean_q: 1437.572 Interval 2999 (1499000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7125 2 episodes - episode_reward: -1614.002 [-1655.129, -1572.875] - loss: 3472.776 - mae: 1087.757 - mean_q: 1413.352 Interval 3000 (1499500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7647 1 episodes - episode_reward: -310.709 [-310.709, -310.709] - loss: 3459.075 - mae: 1082.467 - mean_q: 1404.689 Interval 3001 (1500000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4272 2 episodes - episode_reward: -1128.786 [-1920.391, -337.181] - loss: 3185.610 - mae: 1046.303 - mean_q: 1356.070 Interval 3002 (1500500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5479 4 episodes - episode_reward: -598.616 [-1245.922, -167.974] - loss: 2767.439 - mae: 1017.768 - mean_q: 1316.056 Interval 3003 (1501000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.1646 2 episodes - episode_reward: -1653.101 [-3000.849, -305.352] - loss: 2788.753 - mae: 1007.844 - mean_q: 1302.430 Interval 3004 (1501500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.0639 2 episodes - episode_reward: -1438.991 [-2363.451, -514.531] - loss: 3113.182 - mae: 998.236 - mean_q: 1286.340 Interval 3005 (1502000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.3808 4 episodes - episode_reward: -1136.162 [-3149.062, -101.180] - loss: 2986.751 - mae: 969.064 - mean_q: 1247.022 Interval 3006 (1502500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8848 3 episodes - episode_reward: -371.461 [-536.430, -132.195] - loss: 3177.362 - mae: 958.769 - mean_q: 1231.287 Interval 3007 (1503000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3864 3 episodes - episode_reward: -988.969 [-1507.538, -692.192] - loss: 3025.854 - mae: 920.351 - mean_q: 1176.266 Interval 3008 (1503500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5482 3 episodes - episode_reward: -411.451 [-709.039, -245.504] - loss: 2805.258 - mae: 897.615 - mean_q: 1143.402 Interval 3009 (1504000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3989 3 episodes - episode_reward: -590.547 [-1139.741, -205.703] - loss: 2895.799 - mae: 875.791 - mean_q: 1112.522 Interval 3010 (1504500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8720 2 episodes - episode_reward: -475.150 [-476.761, -473.539] - loss: 2826.942 - mae: 846.552 - mean_q: 1070.840 Interval 3011 (1505000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6124 4 episodes - episode_reward: -552.665 [-787.606, -316.873] - loss: 3566.803 - mae: 834.563 - mean_q: 1051.911 Interval 3012 (1505500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3642 4 episodes - episode_reward: -569.927 [-863.179, -409.656] - loss: 3159.994 - mae: 818.848 - mean_q: 1030.149 Interval 3013 (1506000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8405 1 episodes - episode_reward: -785.148 [-785.148, -785.148] - loss: 3484.857 - mae: 813.174 - mean_q: 1021.186 Interval 3014 (1506500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4548 2 episodes - episode_reward: -657.018 [-728.832, -585.205] - loss: 3421.531 - mae: 793.266 - mean_q: 992.769 Interval 3015 (1507000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3268 2 episodes - episode_reward: -716.316 [-752.800, -679.832] - loss: 2917.609 - mae: 785.073 - mean_q: 983.332 Interval 3016 (1507500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4469 4 episodes - episode_reward: -618.835 [-933.569, -132.334] - loss: 3355.478 - mae: 785.771 - mean_q: 986.523 Interval 3017 (1508000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4852 3 episodes - episode_reward: -553.073 [-773.658, -186.396] - loss: 4124.284 - mae: 777.753 - mean_q: 974.948 Interval 3018 (1508500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1706 2 episodes - episode_reward: -755.613 [-760.579, -750.647] - loss: 4119.567 - mae: 793.885 - mean_q: 998.136 Interval 3019 (1509000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6158 2 episodes - episode_reward: -758.498 [-823.476, -693.520] - loss: 4114.354 - mae: 802.838 - mean_q: 1011.514 Interval 3020 (1509500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6118 5 episodes - episode_reward: -553.735 [-781.923, -400.264] - loss: 3006.780 - mae: 809.749 - mean_q: 1022.930 Interval 3021 (1510000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4978 2 episodes - episode_reward: -603.029 [-698.889, -507.169] - loss: 3323.483 - mae: 816.341 - mean_q: 1031.653 Interval 3022 (1510500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9787 6 episodes - episode_reward: -404.723 [-733.166, -143.240] - loss: 3146.488 - mae: 822.901 - mean_q: 1038.863 Interval 3023 (1511000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7493 6 episodes - episode_reward: -294.672 [-531.253, -169.070] - loss: 3295.270 - mae: 820.535 - mean_q: 1035.544 Interval 3024 (1511500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1591 4 episodes - episode_reward: -699.177 [-859.388, -565.178] - loss: 4120.865 - mae: 834.946 - mean_q: 1054.698 Interval 3025 (1512000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4842 5 episodes - episode_reward: -432.614 [-731.884, -182.161] - loss: 4151.991 - mae: 833.967 - mean_q: 1054.428 Interval 3026 (1512500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.7378 3 episodes - episode_reward: -1100.406 [-1306.984, -806.472] - loss: 3976.194 - mae: 842.433 - mean_q: 1066.487 Interval 3027 (1513000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.1839 6 episodes - episode_reward: -623.681 [-1955.814, -100.000] - loss: 3427.317 - mae: 853.854 - mean_q: 1079.450 Interval 3028 (1513500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.8318 5 episodes - episode_reward: -573.516 [-1270.706, -134.281] - loss: 4184.444 - mae: 871.961 - mean_q: 1104.946 Interval 3029 (1514000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.3105 3 episodes - episode_reward: -1236.427 [-1521.210, -939.502] - loss: 4108.493 - mae: 863.864 - mean_q: 1089.959 Interval 3030 (1514500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.2845 7 episodes - episode_reward: -496.044 [-1439.861, -149.366] - loss: 4292.781 - mae: 864.163 - mean_q: 1089.393 Interval 3031 (1515000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.1034 6 episodes - episode_reward: -445.649 [-1196.257, -100.000] - loss: 3873.241 - mae: 857.410 - mean_q: 1079.307 Interval 3032 (1515500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2181 4 episodes - episode_reward: -654.300 [-1479.803, -234.547] - loss: 4070.676 - mae: 850.504 - mean_q: 1067.023 Interval 3033 (1516000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0833 4 episodes - episode_reward: -567.383 [-704.469, -470.204] - loss: 3689.087 - mae: 837.794 - mean_q: 1046.957 Interval 3034 (1516500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7268 3 episodes - episode_reward: -674.463 [-1330.487, -239.499] - loss: 3652.889 - mae: 812.296 - mean_q: 1012.021 Interval 3035 (1517000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.3615 4 episodes - episode_reward: -1077.212 [-2333.739, -364.069] - loss: 3507.173 - mae: 776.150 - mean_q: 964.795 Interval 3036 (1517500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5689 3 episodes - episode_reward: -541.851 [-568.584, -513.378] - loss: 3210.865 - mae: 763.746 - mean_q: 948.531 Interval 3037 (1518000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3806 3 episodes - episode_reward: -768.762 [-914.732, -543.592] - loss: 2963.379 - mae: 763.965 - mean_q: 953.326 Interval 3038 (1518500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9344 1 episodes - episode_reward: -814.577 [-814.577, -814.577] - loss: 3188.957 - mae: 766.491 - mean_q: 957.663 Interval 3039 (1519000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3189 4 episodes - episode_reward: -560.766 [-846.814, -100.000] - loss: 3096.220 - mae: 771.057 - mean_q: 964.408 Interval 3040 (1519500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9235 6 episodes - episode_reward: -417.916 [-937.665, -164.033] - loss: 3109.220 - mae: 749.632 - mean_q: 933.676 Interval 3041 (1520000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6980 5 episodes - episode_reward: -476.878 [-938.123, -172.075] - loss: 3209.136 - mae: 769.275 - mean_q: 960.873 Interval 3042 (1520500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3158 2 episodes - episode_reward: -726.174 [-742.119, -710.230] - loss: 3535.228 - mae: 757.610 - mean_q: 945.220 Interval 3043 (1521000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6701 4 episodes - episode_reward: -625.024 [-772.163, -463.256] - loss: 3569.936 - mae: 776.584 - mean_q: 970.163 Interval 3044 (1521500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9429 4 episodes - episode_reward: -613.544 [-1438.381, -100.000] - loss: 3776.096 - mae: 777.260 - mean_q: 972.488 Interval 3045 (1522000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.2347 5 episodes - episode_reward: -442.141 [-830.242, -133.898] - loss: 4304.398 - mae: 800.178 - mean_q: 1004.668 Interval 3046 (1522500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8201 2 episodes - episode_reward: -960.187 [-1099.672, -820.702] - loss: 4521.094 - mae: 794.789 - mean_q: 995.534 Interval 3047 (1523000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0940 2 episodes - episode_reward: -701.981 [-888.299, -515.663] - loss: 4502.700 - mae: 817.296 - mean_q: 1026.016 Interval 3048 (1523500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.9427 5 episodes - episode_reward: -623.335 [-903.022, -204.107] - loss: 4840.379 - mae: 812.423 - mean_q: 1018.606 Interval 3049 (1524000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4976 4 episodes - episode_reward: -687.883 [-1033.519, -189.314] - loss: 6023.324 - mae: 842.816 - mean_q: 1061.512 Interval 3050 (1524500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0070 3 episodes - episode_reward: -480.800 [-761.538, -307.152] - loss: 6061.890 - mae: 864.259 - mean_q: 1089.880 Interval 3051 (1525000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2795 4 episodes - episode_reward: -674.617 [-900.330, -191.536] - loss: 6763.750 - mae: 901.587 - mean_q: 1138.079 Interval 3052 (1525500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6236 4 episodes - episode_reward: -555.847 [-1057.668, -126.275] - loss: 6271.751 - mae: 914.236 - mean_q: 1157.060 Interval 3053 (1526000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.9591 5 episodes - episode_reward: -675.183 [-897.710, -288.203] - loss: 8155.424 - mae: 971.632 - mean_q: 1237.937 Interval 3054 (1526500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6581 5 episodes - episode_reward: -498.129 [-979.088, -100.000] - loss: 8906.042 - mae: 1012.800 - mean_q: 1291.161 Interval 3055 (1527000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9159 3 episodes - episode_reward: -663.484 [-1092.520, -374.517] - loss: 7837.389 - mae: 1047.828 - mean_q: 1338.020 Interval 3056 (1527500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3111 6 episodes - episode_reward: -440.481 [-967.627, -165.938] - loss: 9698.859 - mae: 1070.698 - mean_q: 1365.004 Interval 3057 (1528000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.4118 2 episodes - episode_reward: -1606.648 [-2008.880, -1204.415] - loss: 10563.876 - mae: 1103.351 - mean_q: 1413.256 Interval 3058 (1528500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.2310 4 episodes - episode_reward: -358.394 [-615.492, -142.241] - loss: 10416.929 - mae: 1137.794 - mean_q: 1462.559 Interval 3059 (1529000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6356 4 episodes - episode_reward: -934.198 [-1953.542, -503.515] - loss: 9697.854 - mae: 1166.741 - mean_q: 1502.343 Interval 3060 (1529500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4421 2 episodes - episode_reward: -1487.332 [-1914.641, -1060.023] - loss: 14684.346 - mae: 1218.493 - mean_q: 1577.456 Interval 3061 (1530000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.1921 7 episodes - episode_reward: -513.617 [-1200.189, -100.000] - loss: 13469.742 - mae: 1285.549 - mean_q: 1677.794 Interval 3062 (1530500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3167 5 episodes - episode_reward: -534.792 [-708.336, -379.877] - loss: 13102.694 - mae: 1392.227 - mean_q: 1826.741 Interval 3063 (1531000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2215 5 episodes - episode_reward: -522.251 [-1491.401, -117.467] - loss: 16399.543 - mae: 1480.858 - mean_q: 1946.535 Interval 3064 (1531500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8096 3 episodes - episode_reward: -595.174 [-1190.940, -262.167] - loss: 17350.289 - mae: 1492.064 - mean_q: 1953.593 Interval 3065 (1532000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8526 3 episodes - episode_reward: -735.239 [-883.839, -488.955] - loss: 15857.444 - mae: 1584.240 - mean_q: 2087.577 Interval 3066 (1532500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4657 3 episodes - episode_reward: -850.110 [-1138.156, -526.576] - loss: 16243.186 - mae: 1667.476 - mean_q: 2191.742 Interval 3067 (1533000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1809 6 episodes - episode_reward: -347.168 [-637.441, -115.386] - loss: 18971.814 - mae: 1702.824 - mean_q: 2238.240 Interval 3068 (1533500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8949 3 episodes - episode_reward: -398.680 [-478.112, -323.669] - loss: 19430.092 - mae: 1741.971 - mean_q: 2291.952 Interval 3069 (1534000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.9462 4 episodes - episode_reward: -999.684 [-2426.157, -461.188] - loss: 15118.822 - mae: 1839.846 - mean_q: 2429.716 Interval 3070 (1534500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8800 3 episodes - episode_reward: -763.997 [-945.820, -403.238] - loss: 15944.802 - mae: 1916.057 - mean_q: 2525.665 Interval 3071 (1535000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7916 2 episodes - episode_reward: -1141.617 [-1804.396, -478.838] - loss: 25985.229 - mae: 1911.941 - mean_q: 2517.241 Interval 3072 (1535500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7512 2 episodes - episode_reward: -1196.267 [-1828.728, -563.805] - loss: 17509.514 - mae: 1998.522 - mean_q: 2638.449 Interval 3073 (1536000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4000 3 episodes - episode_reward: -342.094 [-454.243, -144.489] - loss: 25518.143 - mae: 2052.833 - mean_q: 2716.347 Interval 3074 (1536500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.7056 5 episodes - episode_reward: -702.502 [-1761.271, -188.294] - loss: 18968.316 - mae: 2088.538 - mean_q: 2765.008 Interval 3075 (1537000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2203 3 episodes - episode_reward: -841.659 [-1560.863, -427.325] - loss: 13883.434 - mae: 2172.268 - mean_q: 2874.813 Interval 3076 (1537500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2567 2 episodes - episode_reward: -1210.715 [-2069.570, -351.860] - loss: 14777.444 - mae: 2231.209 - mean_q: 2954.907 Interval 3077 (1538000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.4624 4 episodes - episode_reward: -1122.300 [-2732.153, -441.668] - loss: 14244.839 - mae: 2242.073 - mean_q: 2968.542 Interval 3078 (1538500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8385 1 episodes - episode_reward: -1756.606 [-1756.606, -1756.606] - loss: 13894.144 - mae: 2242.679 - mean_q: 2967.868 Interval 3079 (1539000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.5539 1 episodes - episode_reward: -1928.216 [-1928.216, -1928.216] - loss: 12443.290 - mae: 2180.722 - mean_q: 2885.129 Interval 3080 (1539500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0851 2 episodes - episode_reward: -1357.471 [-2158.173, -556.770] - loss: 14681.661 - mae: 2177.054 - mean_q: 2883.033 Interval 3081 (1540000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.4469 6 episodes - episode_reward: -631.721 [-2063.014, -115.545] - loss: 16281.525 - mae: 2176.973 - mean_q: 2876.665 Interval 3082 (1540500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.9714 5 episodes - episode_reward: -815.502 [-1611.469, -197.238] - loss: 15638.380 - mae: 2146.665 - mean_q: 2839.629 Interval 3083 (1541000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2244 2 episodes - episode_reward: -755.120 [-1030.213, -480.027] - loss: 12478.010 - mae: 2125.093 - mean_q: 2808.586 Interval 3084 (1541500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8116 4 episodes - episode_reward: -563.161 [-1066.898, -219.019] - loss: 14667.366 - mae: 2075.627 - mean_q: 2738.358 Interval 3085 (1542000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1755 1 episodes - episode_reward: -1981.936 [-1981.936, -1981.936] - loss: 15424.115 - mae: 2074.634 - mean_q: 2742.470 Interval 3086 (1542500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7326 2 episodes - episode_reward: -579.442 [-615.003, -543.880] - loss: 11326.594 - mae: 2072.361 - mean_q: 2743.125 Interval 3087 (1543000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8560 4 episodes - episode_reward: -685.691 [-1280.873, -156.825] - loss: 12772.542 - mae: 2073.386 - mean_q: 2740.637 Interval 3088 (1543500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9140 1 episodes - episode_reward: -1443.360 [-1443.360, -1443.360] - loss: 11558.195 - mae: 2036.075 - mean_q: 2689.400 Interval 3089 (1544000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1763 1 episodes - episode_reward: -1477.726 [-1477.726, -1477.726] - loss: 14816.701 - mae: 2087.049 - mean_q: 2760.469 Interval 3090 (1544500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2848 1 episodes - episode_reward: -1101.459 [-1101.459, -1101.459] - loss: 10471.662 - mae: 2075.854 - mean_q: 2749.621 Interval 3091 (1545000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4169 1 episodes - episode_reward: -1552.522 [-1552.522, -1552.522] - loss: 10012.073 - mae: 2116.063 - mean_q: 2800.530 Interval 3092 (1545500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0902 2 episodes - episode_reward: -657.702 [-1230.890, -84.514] - loss: 11234.241 - mae: 2072.807 - mean_q: 2744.479 Interval 3093 (1546000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6429 1 episodes - episode_reward: -1260.324 [-1260.324, -1260.324] - loss: 12752.936 - mae: 2111.039 - mean_q: 2796.377 Interval 3094 (1546500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.1624 1 episodes - episode_reward: -3167.132 [-3167.132, -3167.132] - loss: 11591.025 - mae: 2078.557 - mean_q: 2751.814 Interval 3095 (1547000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0851 1 episodes - episode_reward: -1377.584 [-1377.584, -1377.584] - loss: 12819.704 - mae: 2098.383 - mean_q: 2780.290 Interval 3096 (1547500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1252 1 episodes - episode_reward: -610.531 [-610.531, -610.531] - loss: 11214.143 - mae: 2122.246 - mean_q: 2809.802 Interval 3097 (1548000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8339 2 episodes - episode_reward: -760.898 [-1088.355, -433.441] - loss: 9688.283 - mae: 2139.668 - mean_q: 2833.020 Interval 3098 (1548500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3759 1 episodes - episode_reward: -616.881 [-616.881, -616.881] - loss: 11350.229 - mae: 2188.623 - mean_q: 2899.253 Interval 3099 (1549000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9102 3 episodes - episode_reward: -132.729 [-149.519, -119.814] - loss: 11348.439 - mae: 2153.611 - mean_q: 2853.882 Interval 3100 (1549500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1199 1 episodes - episode_reward: -1026.086 [-1026.086, -1026.086] - loss: 14063.718 - mae: 2195.281 - mean_q: 2908.698 Interval 3101 (1550000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1809 2 episodes - episode_reward: -404.142 [-445.458, -362.825] - loss: 11454.110 - mae: 2191.062 - mean_q: 2902.231 Interval 3102 (1550500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2124 Interval 3103 (1551000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1688 Interval 3104 (1551500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1462 5 episodes - episode_reward: -135.036 [-237.157, -42.917] - loss: 12240.674 - mae: 2360.329 - mean_q: 3130.774 Interval 3105 (1552000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0833 1 episodes - episode_reward: -162.419 [-162.419, -162.419] - loss: 11941.781 - mae: 2409.645 - mean_q: 3200.015 Interval 3106 (1552500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9540 2 episodes - episode_reward: -191.881 [-364.370, -19.392] - loss: 12442.380 - mae: 2434.151 - mean_q: 3234.938 Interval 3107 (1553000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0420 2 episodes - episode_reward: -23.485 [-29.994, -16.976] - loss: 12668.850 - mae: 2479.800 - mean_q: 3296.704 Interval 3108 (1553500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6990 2 episodes - episode_reward: -157.857 [-167.415, -148.299] - loss: 13207.598 - mae: 2523.409 - mean_q: 3360.415 Interval 3109 (1554000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8165 4 episodes - episode_reward: -101.206 [-142.592, -50.958] - loss: 13622.734 - mae: 2521.632 - mean_q: 3354.625 Interval 3110 (1554500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1930 7 episodes - episode_reward: -145.864 [-234.816, -51.884] - loss: 13241.713 - mae: 2525.281 - mean_q: 3363.326 Interval 3111 (1555000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5071 4 episodes - episode_reward: -222.458 [-339.700, -138.898] - loss: 15170.685 - mae: 2554.365 - mean_q: 3405.330 Interval 3112 (1555500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1682 6 episodes - episode_reward: -154.733 [-308.070, -54.003] - loss: 15917.800 - mae: 2614.121 - mean_q: 3494.828 Interval 3113 (1556000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7614 6 episodes - episode_reward: -222.334 [-285.658, -99.008] - loss: 18350.904 - mae: 2666.423 - mean_q: 3576.116 Interval 3114 (1556500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6820 6 episodes - episode_reward: -253.957 [-414.382, -168.268] - loss: 19471.922 - mae: 2781.815 - mean_q: 3740.627 Interval 3115 (1557000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1028 5 episodes - episode_reward: -199.712 [-372.744, 4.336] - loss: 26774.062 - mae: 2915.070 - mean_q: 3947.164 Interval 3116 (1557500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8034 7 episodes - episode_reward: -195.020 [-305.994, 45.511] - loss: 42093.727 - mae: 3174.365 - mean_q: 4317.960 Interval 3117 (1558000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2071 7 episodes - episode_reward: -230.990 [-377.556, -137.512] - loss: 142277.688 - mae: 3406.959 - mean_q: 4607.120 Interval 3118 (1558500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3106 5 episodes - episode_reward: -216.667 [-466.347, -21.975] - loss: 84680.695 - mae: 3630.269 - mean_q: 4911.206 Interval 3119 (1559000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0971 6 episodes - episode_reward: -191.388 [-310.285, -95.994] - loss: 178277.141 - mae: 3910.023 - mean_q: 5299.487 Interval 3120 (1559500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9997 5 episodes - episode_reward: -182.191 [-243.532, -117.595] - loss: 268977.344 - mae: 4487.278 - mean_q: 6119.419 Interval 3121 (1560000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3417 6 episodes - episode_reward: -233.319 [-362.244, -151.028] - loss: 539001.062 - mae: 5043.074 - mean_q: 6817.670 Interval 3122 (1560500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.0786 8 episodes - episode_reward: -439.699 [-945.204, -103.854] - loss: 552216.375 - mae: 5206.376 - mean_q: 6977.286 Interval 3123 (1561000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.9657 8 episodes - episode_reward: -510.793 [-1031.992, -100.000] - loss: 348464.781 - mae: 5248.383 - mean_q: 7058.343 Interval 3124 (1561500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.5997 9 episodes - episode_reward: -477.018 [-847.958, -102.694] - loss: 313119.125 - mae: 5351.065 - mean_q: 7186.920 Interval 3125 (1562000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -9.2424 7 episodes - episode_reward: -649.230 [-1124.964, -246.852] - loss: 311444.562 - mae: 5382.361 - mean_q: 7227.585 Interval 3126 (1562500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.3068 8 episodes - episode_reward: -524.756 [-829.242, -84.273] - loss: 176840.453 - mae: 5416.903 - mean_q: 7298.574 Interval 3127 (1563000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.7223 9 episodes - episode_reward: -518.252 [-907.147, -100.000] - loss: 289377.062 - mae: 5412.313 - mean_q: 7276.500 Interval 3128 (1563500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -7.4624 8 episodes - episode_reward: -410.268 [-874.358, -96.349] - loss: 273591.500 - mae: 5354.643 - mean_q: 7212.642 Interval 3129 (1564000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -8.5464 8 episodes - episode_reward: -555.223 [-1024.775, -169.812] - loss: 100541.539 - mae: 5342.958 - mean_q: 7208.364 Interval 3130 (1564500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.7868 7 episodes - episode_reward: -570.075 [-912.115, -242.388] - loss: 141398.984 - mae: 5242.432 - mean_q: 7051.958 Interval 3131 (1565000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.0316 8 episodes - episode_reward: -527.458 [-877.218, -128.100] - loss: 210269.391 - mae: 5079.979 - mean_q: 6827.320 Interval 3132 (1565500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.9000 6 episodes - episode_reward: -531.964 [-1014.399, -125.671] - loss: 104061.578 - mae: 4932.750 - mean_q: 6613.662 Interval 3133 (1566000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.6695 6 episodes - episode_reward: -516.039 [-1305.174, -100.592] - loss: 142840.875 - mae: 4676.813 - mean_q: 6259.773 Interval 3134 (1566500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.5996 5 episodes - episode_reward: -945.551 [-1353.129, -367.473] - loss: 59033.047 - mae: 4537.536 - mean_q: 6078.600 Interval 3135 (1567000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8909 4 episodes - episode_reward: -427.506 [-600.246, -230.179] - loss: 50895.473 - mae: 4320.067 - mean_q: 5780.377 Interval 3136 (1567500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.1557 7 episodes - episode_reward: -480.745 [-1076.444, -131.000] - loss: 158421.891 - mae: 4093.476 - mean_q: 5466.880 Interval 3137 (1568000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4113 5 episodes - episode_reward: -402.272 [-564.863, -256.061] - loss: 43897.730 - mae: 3906.746 - mean_q: 5229.126 Interval 3138 (1568500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3397 7 episodes - episode_reward: -326.501 [-561.655, -131.630] - loss: 31387.943 - mae: 3691.862 - mean_q: 4944.254 Interval 3139 (1569000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4052 6 episodes - episode_reward: -290.058 [-421.556, -136.472] - loss: 45945.105 - mae: 3597.379 - mean_q: 4819.312 Interval 3140 (1569500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0742 4 episodes - episode_reward: -470.712 [-976.773, -160.438] - loss: 47700.656 - mae: 3526.285 - mean_q: 4741.152 Interval 3141 (1570000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1345 5 episodes - episode_reward: -284.282 [-447.764, -133.234] - loss: 51086.375 - mae: 3563.011 - mean_q: 4801.933 Interval 3142 (1570500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5497 5 episodes - episode_reward: -422.331 [-580.500, -310.576] - loss: 43549.109 - mae: 3645.176 - mean_q: 4922.348 Interval 3143 (1571000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7661 6 episodes - episode_reward: -311.539 [-531.321, -155.954] - loss: 47690.676 - mae: 3805.283 - mean_q: 5147.044 Interval 3144 (1571500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8281 4 episodes - episode_reward: -468.130 [-565.208, -338.537] - loss: 126815.641 - mae: 3891.985 - mean_q: 5260.846 Interval 3145 (1572000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5066 5 episodes - episode_reward: -250.360 [-510.201, -105.618] - loss: 62624.555 - mae: 4074.971 - mean_q: 5530.814 Interval 3146 (1572500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6520 7 episodes - episode_reward: -267.521 [-527.209, -100.000] - loss: 126845.734 - mae: 4292.345 - mean_q: 5834.351 Interval 3147 (1573000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6617 4 episodes - episode_reward: -452.337 [-541.988, -271.822] - loss: 85042.742 - mae: 4495.579 - mean_q: 6100.683 Interval 3148 (1573500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8524 3 episodes - episode_reward: -472.827 [-612.927, -212.648] - loss: 171788.172 - mae: 4731.665 - mean_q: 6414.777 Interval 3149 (1574000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0836 5 episodes - episode_reward: -398.124 [-570.060, -100.000] - loss: 203604.984 - mae: 5060.848 - mean_q: 6866.312 Interval 3150 (1574500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8876 4 episodes - episode_reward: -470.136 [-536.402, -367.980] - loss: 135501.422 - mae: 5340.918 - mean_q: 7274.083 Interval 3151 (1575000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5818 3 episodes - episode_reward: -472.217 [-702.602, -242.373] - loss: 301765.219 - mae: 5707.951 - mean_q: 7776.165 Interval 3152 (1575500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2363 4 episodes - episode_reward: -539.698 [-783.103, -240.410] - loss: 171163.234 - mae: 5928.500 - mean_q: 8093.361 Interval 3153 (1576000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7080 2 episodes - episode_reward: -846.094 [-923.256, -768.932] - loss: 187958.469 - mae: 6327.571 - mean_q: 8650.274 Interval 3154 (1576500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1727 4 episodes - episode_reward: -518.031 [-1018.936, -158.955] - loss: 201073.203 - mae: 6398.454 - mean_q: 8748.027 Interval 3155 (1577000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.1475 2 episodes - episode_reward: -1748.933 [-2612.311, -885.555] - loss: 207373.344 - mae: 6711.254 - mean_q: 9171.544 Interval 3156 (1577500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2770 4 episodes - episode_reward: -564.243 [-1140.046, -353.655] - loss: 135869.078 - mae: 6826.330 - mean_q: 9330.167 Interval 3157 (1578000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0627 3 episodes - episode_reward: -755.919 [-1402.891, -348.328] - loss: 169600.953 - mae: 6914.024 - mean_q: 9426.464 Interval 3158 (1578500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.1927 2 episodes - episode_reward: -2140.273 [-4028.234, -252.312] - loss: 191344.609 - mae: 6995.005 - mean_q: 9523.146 Interval 3159 (1579000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.2497 2 episodes - episode_reward: -1901.051 [-2799.586, -1002.515] - loss: 190999.219 - mae: 7095.814 - mean_q: 9650.319 Interval 3160 (1579500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4858 2 episodes - episode_reward: -799.074 [-1323.866, -274.282] - loss: 149003.734 - mae: 7067.136 - mean_q: 9601.706 Interval 3161 (1580000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.7813 2 episodes - episode_reward: -1461.866 [-2142.206, -781.527] - loss: 141588.891 - mae: 7040.449 - mean_q: 9554.919 Interval 3162 (1580500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.0592 2 episodes - episode_reward: -1904.483 [-3366.747, -442.218] - loss: 135979.938 - mae: 7151.255 - mean_q: 9703.454 Interval 3163 (1581000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.5174 1 episodes - episode_reward: -2569.923 [-2569.923, -2569.923] - loss: 145516.609 - mae: 7150.170 - mean_q: 9680.865 Interval 3164 (1581500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5383 3 episodes - episode_reward: -983.343 [-1470.806, -188.452] - loss: 128344.992 - mae: 7209.590 - mean_q: 9743.996 Interval 3165 (1582000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7071 2 episodes - episode_reward: -737.835 [-1360.642, -115.029] - loss: 157666.594 - mae: 7059.044 - mean_q: 9502.899 Interval 3166 (1582500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4453 1 episodes - episode_reward: -1111.987 [-1111.987, -1111.987] - loss: 122914.023 - mae: 6894.821 - mean_q: 9277.963 Interval 3167 (1583000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1965 3 episodes - episode_reward: -526.310 [-1083.294, -118.450] - loss: 124010.914 - mae: 6943.029 - mean_q: 9331.766 Interval 3168 (1583500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4419 3 episodes - episode_reward: -537.238 [-598.596, -432.391] - loss: 128822.969 - mae: 6808.362 - mean_q: 9145.254 Interval 3169 (1584000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8117 2 episodes - episode_reward: -365.798 [-403.409, -328.187] - loss: 148264.219 - mae: 6738.093 - mean_q: 9047.742 Interval 3170 (1584500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5019 3 episodes - episode_reward: -300.772 [-502.280, -109.342] - loss: 125225.422 - mae: 6651.558 - mean_q: 8931.709 Interval 3171 (1585000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4315 2 episodes - episode_reward: -439.162 [-555.535, -322.789] - loss: 166385.719 - mae: 6476.431 - mean_q: 8701.035 Interval 3172 (1585500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6010 2 episodes - episode_reward: -459.173 [-463.390, -454.955] - loss: 135460.703 - mae: 6540.392 - mean_q: 8793.410 Interval 3173 (1586000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6061 3 episodes - episode_reward: -444.644 [-607.349, -187.803] - loss: 144428.062 - mae: 6502.916 - mean_q: 8743.651 Interval 3174 (1586500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3123 4 episodes - episode_reward: -341.925 [-452.948, -128.221] - loss: 149753.312 - mae: 6616.616 - mean_q: 8901.393 Interval 3175 (1587000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3491 1 episodes - episode_reward: -524.744 [-524.744, -524.744] - loss: 167458.219 - mae: 6631.224 - mean_q: 8915.808 Interval 3176 (1587500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6058 5 episodes - episode_reward: -286.636 [-615.957, -115.810] - loss: 168972.094 - mae: 6614.506 - mean_q: 8888.471 Interval 3177 (1588000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6512 4 episodes - episode_reward: -337.647 [-750.553, -66.324] - loss: 166106.016 - mae: 6665.041 - mean_q: 8963.524 Interval 3178 (1588500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0410 2 episodes - episode_reward: -357.248 [-385.277, -329.219] - loss: 181383.016 - mae: 6622.646 - mean_q: 8906.042 Interval 3179 (1589000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9352 7 episodes - episode_reward: -253.521 [-621.837, -110.022] - loss: 145276.562 - mae: 6719.716 - mean_q: 9039.068 Interval 3180 (1589500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7270 1 episodes - episode_reward: -254.687 [-254.687, -254.687] - loss: 251004.359 - mae: 6738.773 - mean_q: 9051.515 Interval 3181 (1590000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6786 2 episodes - episode_reward: -472.090 [-750.032, -194.149] - loss: 223596.219 - mae: 6845.330 - mean_q: 9217.776 Interval 3182 (1590500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3270 2 episodes - episode_reward: -578.760 [-679.300, -478.220] - loss: 181728.672 - mae: 6965.730 - mean_q: 9387.143 Interval 3183 (1591000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9099 2 episodes - episode_reward: -247.713 [-403.688, -91.738] - loss: 254491.781 - mae: 7231.854 - mean_q: 9746.336 Interval 3184 (1591500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9077 4 episodes - episode_reward: -229.326 [-416.494, -100.080] - loss: 214976.391 - mae: 7135.073 - mean_q: 9621.113 Interval 3185 (1592000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2913 2 episodes - episode_reward: -402.523 [-621.130, -183.916] - loss: 253816.969 - mae: 7257.139 - mean_q: 9790.994 Interval 3186 (1592500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6635 4 episodes - episode_reward: -417.760 [-693.191, -125.801] - loss: 232774.484 - mae: 7323.180 - mean_q: 9895.089 Interval 3187 (1593000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3023 2 episodes - episode_reward: -183.989 [-205.488, -162.489] - loss: 248520.516 - mae: 7618.664 - mean_q: 10302.780 Interval 3188 (1593500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8112 5 episodes - episode_reward: -432.217 [-576.874, -276.089] - loss: 239580.016 - mae: 7714.085 - mean_q: 10436.675 Interval 3189 (1594000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0286 4 episodes - episode_reward: -352.402 [-557.027, -155.111] - loss: 236949.406 - mae: 8072.795 - mean_q: 10895.815 Interval 3190 (1594500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4816 4 episodes - episode_reward: -591.612 [-773.197, -413.548] - loss: 262485.031 - mae: 8033.639 - mean_q: 10807.428 Interval 3191 (1595000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3065 5 episodes - episode_reward: -326.253 [-533.363, -100.000] - loss: 208997.453 - mae: 8192.189 - mean_q: 11000.889 Interval 3192 (1595500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8501 3 episodes - episode_reward: -596.258 [-664.010, -470.819] - loss: 302333.156 - mae: 8131.923 - mean_q: 10889.858 Interval 3193 (1596000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2986 4 episodes - episode_reward: -470.059 [-583.298, -406.754] - loss: 271005.844 - mae: 7898.280 - mean_q: 10553.639 Interval 3194 (1596500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0967 4 episodes - episode_reward: -366.398 [-608.865, -132.457] - loss: 340114.219 - mae: 7405.218 - mean_q: 9886.046 Interval 3195 (1597000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7635 5 episodes - episode_reward: -321.823 [-488.496, -110.525] - loss: 234638.672 - mae: 7243.151 - mean_q: 9674.587 Interval 3196 (1597500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3352 3 episodes - episode_reward: -387.601 [-667.365, -100.000] - loss: 240119.594 - mae: 7128.080 - mean_q: 9518.876 Interval 3197 (1598000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6392 5 episodes - episode_reward: -421.398 [-687.929, -217.530] - loss: 322753.062 - mae: 6853.929 - mean_q: 9149.774 Interval 3198 (1598500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6076 3 episodes - episode_reward: -416.532 [-669.586, -200.900] - loss: 233317.984 - mae: 6770.180 - mean_q: 9038.226 Interval 3199 (1599000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7114 4 episodes - episode_reward: -367.227 [-466.619, -252.457] - loss: 309547.750 - mae: 6675.978 - mean_q: 8909.355 Interval 3200 (1599500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7439 3 episodes - episode_reward: -450.300 [-608.932, -223.624] - loss: 281873.531 - mae: 6382.191 - mean_q: 8517.363 Interval 3201 (1600000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8572 5 episodes - episode_reward: -375.787 [-698.418, -173.546] - loss: 289127.125 - mae: 6445.874 - mean_q: 8608.858 Interval 3202 (1600500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0807 3 episodes - episode_reward: -493.549 [-822.746, -312.281] - loss: 382622.125 - mae: 6425.722 - mean_q: 8586.970 Interval 3203 (1601000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1421 3 episodes - episode_reward: -488.037 [-672.349, -251.449] - loss: 294279.031 - mae: 6297.723 - mean_q: 8425.933 Interval 3204 (1601500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3261 4 episodes - episode_reward: -501.763 [-756.055, -128.565] - loss: 261498.047 - mae: 6185.199 - mean_q: 8294.132 Interval 3205 (1602000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7632 1 episodes - episode_reward: -577.224 [-577.224, -577.224] - loss: 321722.750 - mae: 6206.674 - mean_q: 8326.258 Interval 3206 (1602500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5448 7 episodes - episode_reward: -402.071 [-740.924, -100.000] - loss: 224696.984 - mae: 6234.971 - mean_q: 8356.773 Interval 3207 (1603000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4720 4 episodes - episode_reward: -556.101 [-720.265, -346.210] - loss: 363700.781 - mae: 6207.921 - mean_q: 8319.567 Interval 3208 (1603500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9047 5 episodes - episode_reward: -317.575 [-565.636, -126.450] - loss: 276178.812 - mae: 6344.382 - mean_q: 8504.928 Interval 3209 (1604000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3323 5 episodes - episode_reward: -234.956 [-395.793, -100.476] - loss: 268422.875 - mae: 6344.854 - mean_q: 8500.631 Interval 3210 (1604500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0954 5 episodes - episode_reward: -372.655 [-551.384, -200.070] - loss: 244378.312 - mae: 6318.147 - mean_q: 8446.889 Interval 3211 (1605000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6332 5 episodes - episode_reward: -377.729 [-574.887, -123.313] - loss: 331592.188 - mae: 6259.117 - mean_q: 8361.479 Interval 3212 (1605500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8140 5 episodes - episode_reward: -296.000 [-552.709, -134.693] - loss: 499073.344 - mae: 6317.366 - mean_q: 8441.247 Interval 3213 (1606000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6444 5 episodes - episode_reward: -351.597 [-626.492, -155.905] - loss: 299203.594 - mae: 6134.859 - mean_q: 8213.209 Interval 3214 (1606500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9158 5 episodes - episode_reward: -494.927 [-1066.956, -180.043] - loss: 362417.625 - mae: 6229.175 - mean_q: 8323.412 Interval 3215 (1607000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1100 3 episodes - episode_reward: -377.559 [-489.395, -221.623] - loss: 296309.906 - mae: 6161.887 - mean_q: 8236.048 Interval 3216 (1607500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5149 5 episodes - episode_reward: -347.838 [-588.298, -94.710] - loss: 270348.062 - mae: 6253.369 - mean_q: 8352.875 Interval 3217 (1608000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2767 5 episodes - episode_reward: -249.768 [-390.911, -99.926] - loss: 365488.719 - mae: 6110.431 - mean_q: 8162.346 Interval 3218 (1608500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8451 3 episodes - episode_reward: -568.136 [-905.322, -267.328] - loss: 358431.031 - mae: 6197.488 - mean_q: 8290.476 Interval 3219 (1609000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1774 6 episodes - episode_reward: -361.805 [-570.644, -191.440] - loss: 350958.188 - mae: 6324.811 - mean_q: 8468.890 Interval 3220 (1609500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1118 6 episodes - episode_reward: -308.628 [-614.414, -111.267] - loss: 385589.750 - mae: 6288.217 - mean_q: 8411.792 Interval 3221 (1610000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5448 6 episodes - episode_reward: -330.759 [-967.039, -3.144] - loss: 504898.406 - mae: 6437.082 - mean_q: 8609.615 Interval 3222 (1610500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5464 4 episodes - episode_reward: -312.904 [-681.758, -160.339] - loss: 325400.875 - mae: 6599.391 - mean_q: 8838.018 Interval 3223 (1611000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7129 2 episodes - episode_reward: -682.337 [-868.833, -495.842] - loss: 456070.969 - mae: 6790.565 - mean_q: 9096.772 Interval 3224 (1611500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6414 2 episodes - episode_reward: -262.120 [-276.662, -247.578] - loss: 302248.375 - mae: 6946.538 - mean_q: 9300.502 Interval 3225 (1612000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.7670 4 episodes - episode_reward: -917.797 [-2679.063, -178.236] - loss: 1698388.500 - mae: 7443.743 - mean_q: 9974.547 Interval 3226 (1612500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0135 3 episodes - episode_reward: -273.332 [-465.576, -110.103] - loss: 459787.906 - mae: 7680.044 - mean_q: 10333.266 Interval 3227 (1613000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0238 3 episodes - episode_reward: -410.245 [-436.120, -376.859] - loss: 456835.406 - mae: 7924.830 - mean_q: 10643.551 Interval 3228 (1613500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6567 2 episodes - episode_reward: -415.241 [-609.328, -221.155] - loss: 1108072.875 - mae: 8320.124 - mean_q: 11173.690 Interval 3229 (1614000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5448 3 episodes - episode_reward: -226.661 [-296.389, -107.426] - loss: 668487.938 - mae: 8465.040 - mean_q: 11389.927 Interval 3230 (1614500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4375 3 episodes - episode_reward: -237.117 [-315.702, -136.000] - loss: 417195.125 - mae: 8959.471 - mean_q: 12076.811 Interval 3231 (1615000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4961 3 episodes - episode_reward: -401.368 [-570.590, -280.307] - loss: 429691.688 - mae: 9121.026 - mean_q: 12317.515 Interval 3232 (1615500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.1434 1 episodes - episode_reward: -248.382 [-248.382, -248.382] - loss: 475144.438 - mae: 9730.377 - mean_q: 13169.312 Interval 3233 (1616000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7148 2 episodes - episode_reward: -417.639 [-525.273, -310.006] - loss: 557748.562 - mae: 10517.055 - mean_q: 14261.913 Interval 3234 (1616500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4877 Interval 3235 (1617000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2153 3 episodes - episode_reward: -698.751 [-1511.057, -173.080] - loss: 709307.188 - mae: 11561.377 - mean_q: 15725.242 Interval 3236 (1617500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1759 3 episodes - episode_reward: -242.816 [-357.344, -111.885] - loss: 653436.312 - mae: 12411.716 - mean_q: 16913.678 Interval 3237 (1618000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5052 4 episodes - episode_reward: -209.838 [-297.686, -171.308] - loss: 656206.312 - mae: 12971.056 - mean_q: 17685.635 Interval 3238 (1618500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5064 3 episodes - episode_reward: -420.520 [-580.045, -168.463] - loss: 890291.562 - mae: 13713.723 - mean_q: 18708.432 Interval 3239 (1619000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9612 2 episodes - episode_reward: -460.635 [-470.903, -450.367] - loss: 784457.625 - mae: 14610.301 - mean_q: 19957.893 Interval 3240 (1619500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0287 3 episodes - episode_reward: -358.743 [-479.885, -214.571] - loss: 723882.438 - mae: 15217.036 - mean_q: 20824.633 Interval 3241 (1620000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8390 1 episodes - episode_reward: -498.609 [-498.609, -498.609] - loss: 826510.938 - mae: 16149.576 - mean_q: 22119.100 Interval 3242 (1620500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7434 3 episodes - episode_reward: -417.650 [-626.194, -122.240] - loss: 954508.375 - mae: 16545.645 - mean_q: 22701.281 Interval 3243 (1621000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6079 2 episodes - episode_reward: -380.580 [-533.838, -227.322] - loss: 1049602.125 - mae: 17533.723 - mean_q: 24033.588 Interval 3244 (1621500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1972 2 episodes - episode_reward: -167.847 [-203.006, -132.688] - loss: 860868.312 - mae: 17882.832 - mean_q: 24546.818 Interval 3245 (1622000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9035 1 episodes - episode_reward: -666.805 [-666.805, -666.805] - loss: 838277.688 - mae: 18224.928 - mean_q: 25015.604 Interval 3246 (1622500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5356 1 episodes - episode_reward: -1494.585 [-1494.585, -1494.585] - loss: 1120857.000 - mae: 18598.879 - mean_q: 25520.119 Interval 3247 (1623000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8025 1 episodes - episode_reward: -1098.445 [-1098.445, -1098.445] - loss: 1018077.875 - mae: 18961.258 - mean_q: 25991.611 Interval 3248 (1623500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3557 2 episodes - episode_reward: -1151.175 [-1739.727, -562.623] - loss: 713210.000 - mae: 19023.695 - mean_q: 26066.586 Interval 3249 (1624000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0581 2 episodes - episode_reward: -808.011 [-1288.238, -327.783] - loss: 564744.875 - mae: 19140.156 - mean_q: 26192.715 Interval 3250 (1624500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8597 Interval 3251 (1625000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2455 4 episodes - episode_reward: -633.275 [-1268.448, -100.848] - loss: 650803.812 - mae: 18829.846 - mean_q: 25703.691 Interval 3252 (1625500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4579 2 episodes - episode_reward: -690.866 [-1119.881, -261.851] - loss: 731101.938 - mae: 19082.092 - mean_q: 26010.363 Interval 3253 (1626000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9792 2 episodes - episode_reward: -654.073 [-1119.475, -188.671] - loss: 637072.688 - mae: 18923.090 - mean_q: 25745.115 Interval 3254 (1626500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0558 2 episodes - episode_reward: -465.511 [-803.484, -127.538] - loss: 698058.062 - mae: 18542.527 - mean_q: 25210.658 Interval 3255 (1627000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3931 1 episodes - episode_reward: -2044.020 [-2044.020, -2044.020] - loss: 609819.688 - mae: 18351.654 - mean_q: 24924.805 Interval 3256 (1627500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.7852 5 episodes - episode_reward: -840.630 [-3161.629, -125.361] - loss: 528840.375 - mae: 18611.014 - mean_q: 25259.297 Interval 3257 (1628000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5722 2 episodes - episode_reward: -690.150 [-1107.393, -272.907] - loss: 473533.625 - mae: 18198.082 - mean_q: 24668.244 Interval 3258 (1628500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6873 1 episodes - episode_reward: -1171.065 [-1171.065, -1171.065] - loss: 643547.188 - mae: 18111.582 - mean_q: 24520.826 Interval 3259 (1629000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7926 4 episodes - episode_reward: -560.256 [-1183.287, -218.766] - loss: 536066.875 - mae: 17863.570 - mean_q: 24153.592 Interval 3260 (1629500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8775 2 episodes - episode_reward: -564.530 [-602.075, -526.984] - loss: 473102.750 - mae: 17676.178 - mean_q: 23880.438 Interval 3261 (1630000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6544 5 episodes - episode_reward: -536.887 [-1809.597, -168.246] - loss: 531425.562 - mae: 17254.885 - mean_q: 23301.428 Interval 3262 (1630500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6197 3 episodes - episode_reward: -342.929 [-514.635, -132.672] - loss: 445103.500 - mae: 16522.617 - mean_q: 22278.904 Interval 3263 (1631000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7853 1 episodes - episode_reward: -1136.805 [-1136.805, -1136.805] - loss: 377370.844 - mae: 16395.758 - mean_q: 22091.414 Interval 3264 (1631500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6777 3 episodes - episode_reward: -284.197 [-407.063, -100.000] - loss: 500717.688 - mae: 15998.825 - mean_q: 21544.141 Interval 3265 (1632000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7362 4 episodes - episode_reward: -382.929 [-575.559, -192.452] - loss: 426830.812 - mae: 15677.401 - mean_q: 21115.219 Interval 3266 (1632500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6676 1 episodes - episode_reward: -747.026 [-747.026, -747.026] - loss: 307247.781 - mae: 15239.737 - mean_q: 20537.711 Interval 3267 (1633000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8938 2 episodes - episode_reward: -531.842 [-602.138, -461.545] - loss: 387634.375 - mae: 14810.512 - mean_q: 19958.584 Interval 3268 (1633500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6651 1 episodes - episode_reward: -632.936 [-632.936, -632.936] - loss: 393875.312 - mae: 14435.956 - mean_q: 19451.496 Interval 3269 (1634000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5040 5 episodes - episode_reward: -295.163 [-642.639, -100.000] - loss: 361123.531 - mae: 13996.619 - mean_q: 18862.633 Interval 3270 (1634500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8609 1 episodes - episode_reward: -669.813 [-669.813, -669.813] - loss: 305666.594 - mae: 13719.397 - mean_q: 18512.787 Interval 3271 (1635000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8134 3 episodes - episode_reward: -463.925 [-554.738, -311.647] - loss: 331818.531 - mae: 13483.485 - mean_q: 18221.916 Interval 3272 (1635500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0777 2 episodes - episode_reward: -663.031 [-710.522, -615.539] - loss: 462049.094 - mae: 13278.464 - mean_q: 17949.131 Interval 3273 (1636000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2821 1 episodes - episode_reward: -568.535 [-568.535, -568.535] - loss: 375073.125 - mae: 13291.402 - mean_q: 17990.613 Interval 3274 (1636500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7937 1 episodes - episode_reward: -1121.958 [-1121.958, -1121.958] - loss: 315043.281 - mae: 13397.094 - mean_q: 18156.066 Interval 3275 (1637000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6107 3 episodes - episode_reward: -601.315 [-652.277, -542.329] - loss: 409452.750 - mae: 13454.088 - mean_q: 18223.471 Interval 3276 (1637500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1405 1 episodes - episode_reward: -1278.738 [-1278.738, -1278.738] - loss: 349227.469 - mae: 13390.956 - mean_q: 18143.277 Interval 3277 (1638000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9909 1 episodes - episode_reward: -1588.573 [-1588.573, -1588.573] - loss: 406570.438 - mae: 13609.054 - mean_q: 18429.961 Interval 3278 (1638500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6057 Interval 3279 (1639000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5762 4 episodes - episode_reward: -881.754 [-2803.833, -96.131] - loss: 448072.938 - mae: 13338.000 - mean_q: 18034.689 Interval 3280 (1639500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2413 1 episodes - episode_reward: -1283.532 [-1283.532, -1283.532] - loss: 364934.844 - mae: 13435.543 - mean_q: 18160.049 Interval 3281 (1640000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1295 1 episodes - episode_reward: -1317.085 [-1317.085, -1317.085] - loss: 363692.875 - mae: 13158.632 - mean_q: 17772.982 Interval 3282 (1640500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0552 2 episodes - episode_reward: -499.832 [-837.423, -162.242] - loss: 386175.938 - mae: 13025.065 - mean_q: 17592.170 Interval 3283 (1641000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.3179 2 episodes - episode_reward: -2447.229 [-4673.014, -221.444] - loss: 370664.594 - mae: 12897.677 - mean_q: 17417.766 Interval 3284 (1641500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.7256 1 episodes - episode_reward: -167.426 [-167.426, -167.426] - loss: 354659.844 - mae: 12912.554 - mean_q: 17435.932 Interval 3285 (1642000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -8.5926 Interval 3286 (1642500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3119 4 episodes - episode_reward: -1525.050 [-5719.672, -100.989] - loss: 352593.094 - mae: 12463.505 - mean_q: 16810.080 Interval 3287 (1643000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7830 2 episodes - episode_reward: -385.452 [-661.353, -109.552] - loss: 277916.875 - mae: 12288.563 - mean_q: 16578.646 Interval 3288 (1643500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0846 Interval 3289 (1644000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5105 2 episodes - episode_reward: -165.502 [-231.004, -100.000] - loss: 337021.188 - mae: 12078.872 - mean_q: 16280.314 Interval 3290 (1644500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3957 1 episodes - episode_reward: -634.525 [-634.525, -634.525] - loss: 292394.594 - mae: 11797.068 - mean_q: 15889.220 Interval 3291 (1645000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9892 2 episodes - episode_reward: -311.246 [-440.356, -182.137] - loss: 282989.469 - mae: 11485.995 - mean_q: 15474.548 Interval 3292 (1645500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3470 2 episodes - episode_reward: -1018.463 [-1821.777, -215.149] - loss: 279436.250 - mae: 11046.403 - mean_q: 14877.691 Interval 3293 (1646000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.1898 1 episodes - episode_reward: -2061.247 [-2061.247, -2061.247] - loss: 268445.875 - mae: 10956.646 - mean_q: 14759.887 Interval 3294 (1646500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.8061 Interval 3295 (1647000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3927 4 episodes - episode_reward: -1428.495 [-3662.801, -100.000] - loss: 330660.125 - mae: 10552.517 - mean_q: 14207.554 Interval 3296 (1647500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3118 3 episodes - episode_reward: -692.685 [-1609.879, -109.954] - loss: 245191.219 - mae: 10313.241 - mean_q: 13890.602 Interval 3297 (1648000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4470 1 episodes - episode_reward: -2262.892 [-2262.892, -2262.892] - loss: 211727.328 - mae: 10050.989 - mean_q: 13542.755 Interval 3298 (1648500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8530 Interval 3299 (1649000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.0837 1 episodes - episode_reward: -3436.145 [-3436.145, -3436.145] - loss: 223495.297 - mae: 9765.509 - mean_q: 13164.930 Interval 3300 (1649500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6463 2 episodes - episode_reward: -828.735 [-1370.910, -286.560] - loss: 239927.344 - mae: 9614.179 - mean_q: 12951.637 Interval 3301 (1650000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5800 3 episodes - episode_reward: -834.678 [-1372.727, -100.000] - loss: 224027.500 - mae: 9435.369 - mean_q: 12714.064 Interval 3302 (1650500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6687 1 episodes - episode_reward: -1114.801 [-1114.801, -1114.801] - loss: 217968.188 - mae: 9397.168 - mean_q: 12666.535 Interval 3303 (1651000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5566 2 episodes - episode_reward: -711.450 [-964.171, -458.729] - loss: 220764.234 - mae: 9134.964 - mean_q: 12313.205 Interval 3304 (1651500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9879 Interval 3305 (1652000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1111 2 episodes - episode_reward: -2088.393 [-4015.008, -161.778] - loss: 195326.984 - mae: 9143.607 - mean_q: 12320.810 Interval 3306 (1652500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3101 Interval 3307 (1653000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7373 1 episodes - episode_reward: -1503.689 [-1503.689, -1503.689] - loss: 210964.188 - mae: 9071.081 - mean_q: 12226.616 Interval 3308 (1653500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4159 Interval 3309 (1654000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3952 1 episodes - episode_reward: -1254.885 [-1254.885, -1254.885] - loss: 183283.703 - mae: 9132.203 - mean_q: 12304.796 Interval 3310 (1654500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.4001 Interval 3311 (1655000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4764 4 episodes - episode_reward: -280.863 [-604.212, -119.210] - loss: 216692.125 - mae: 8882.165 - mean_q: 11941.040 Interval 3312 (1655500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5105 Interval 3313 (1656000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2599 Interval 3314 (1656500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4820 1 episodes - episode_reward: -566.179 [-566.179, -566.179] - loss: 183806.297 - mae: 8954.819 - mean_q: 12043.056 Interval 3315 (1657000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9282 Interval 3316 (1657500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9111 1 episodes - episode_reward: -918.110 [-918.110, -918.110] - loss: 218714.891 - mae: 8833.874 - mean_q: 11876.375 Interval 3317 (1658000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1051 Interval 3318 (1658500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5497 5 episodes - episode_reward: -255.419 [-397.689, 9.773] - loss: 183023.266 - mae: 8775.495 - mean_q: 11805.641 Interval 3319 (1659000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9931 3 episodes - episode_reward: -349.538 [-654.977, -185.850] - loss: 242873.062 - mae: 8894.665 - mean_q: 11960.929 Interval 3320 (1659500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9241 2 episodes - episode_reward: -404.883 [-502.317, -307.450] - loss: 217233.859 - mae: 8755.103 - mean_q: 11774.312 Interval 3321 (1660000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7174 2 episodes - episode_reward: -454.357 [-645.165, -263.549] - loss: 211292.891 - mae: 8691.294 - mean_q: 11684.898 Interval 3322 (1660500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.0638 1 episodes - episode_reward: -185.149 [-185.149, -185.149] - loss: 260099.141 - mae: 8696.444 - mean_q: 11680.310 Interval 3323 (1661000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4272 1 episodes - episode_reward: -172.689 [-172.689, -172.689] - loss: 206939.359 - mae: 8494.677 - mean_q: 11419.923 Interval 3324 (1661500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1255 4 episodes - episode_reward: -519.157 [-571.863, -480.212] - loss: 181210.500 - mae: 8490.972 - mean_q: 11412.793 Interval 3325 (1662000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0173 2 episodes - episode_reward: -189.912 [-296.868, -82.956] - loss: 193291.219 - mae: 8405.797 - mean_q: 11295.256 Interval 3326 (1662500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7777 3 episodes - episode_reward: -168.394 [-251.515, -119.858] - loss: 194184.438 - mae: 8244.253 - mean_q: 11075.917 Interval 3327 (1663000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1733 2 episodes - episode_reward: -226.701 [-229.899, -223.504] - loss: 209143.562 - mae: 8050.202 - mean_q: 10819.406 Interval 3328 (1663500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0084 Interval 3329 (1664000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2417 2 episodes - episode_reward: -414.036 [-728.071, -100.000] - loss: 181328.172 - mae: 7869.186 - mean_q: 10597.089 Interval 3330 (1664500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2671 1 episodes - episode_reward: -550.017 [-550.017, -550.017] - loss: 148249.891 - mae: 8022.438 - mean_q: 10805.845 Interval 3331 (1665000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5808 3 episodes - episode_reward: -401.628 [-620.009, -100.000] - loss: 170837.250 - mae: 7989.846 - mean_q: 10755.930 Interval 3332 (1665500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5577 1 episodes - episode_reward: -274.226 [-274.226, -274.226] - loss: 170372.609 - mae: 7917.504 - mean_q: 10654.796 Interval 3333 (1666000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3852 3 episodes - episode_reward: -266.820 [-338.568, -147.185] - loss: 158205.625 - mae: 7943.383 - mean_q: 10689.011 Interval 3334 (1666500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0468 Interval 3335 (1667000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5779 1 episodes - episode_reward: -965.349 [-965.349, -965.349] - loss: 192827.438 - mae: 7834.146 - mean_q: 10529.235 Interval 3336 (1667500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9581 3 episodes - episode_reward: -405.994 [-651.546, -240.178] - loss: 164784.109 - mae: 7688.377 - mean_q: 10336.214 Interval 3337 (1668000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2788 Interval 3338 (1668500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6536 4 episodes - episode_reward: -591.776 [-993.552, -111.222] - loss: 151304.297 - mae: 7687.792 - mean_q: 10345.773 Interval 3339 (1669000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8307 1 episodes - episode_reward: -367.608 [-367.608, -367.608] - loss: 129559.281 - mae: 7666.336 - mean_q: 10315.632 Interval 3340 (1669500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8037 2 episodes - episode_reward: -508.769 [-843.596, -173.941] - loss: 154531.938 - mae: 7715.086 - mean_q: 10388.411 Interval 3341 (1670000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7255 1 episodes - episode_reward: -1000.135 [-1000.135, -1000.135] - loss: 125487.391 - mae: 7631.946 - mean_q: 10279.610 Interval 3342 (1670500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9565 2 episodes - episode_reward: -486.773 [-686.308, -287.238] - loss: 119538.445 - mae: 7645.476 - mean_q: 10298.687 Interval 3343 (1671000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3525 Interval 3344 (1671500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8168 1 episodes - episode_reward: -997.523 [-997.523, -997.523] - loss: 153865.453 - mae: 7623.467 - mean_q: 10268.509 Interval 3345 (1672000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7162 2 episodes - episode_reward: -480.031 [-743.179, -216.883] - loss: 150171.547 - mae: 7554.533 - mean_q: 10181.456 Interval 3346 (1672500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7232 1 episodes - episode_reward: -934.671 [-934.671, -934.671] - loss: 127644.898 - mae: 7625.980 - mean_q: 10286.803 Interval 3347 (1673000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0078 1 episodes - episode_reward: -567.912 [-567.912, -567.912] - loss: 127107.484 - mae: 7566.658 - mean_q: 10206.468 Interval 3348 (1673500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3573 3 episodes - episode_reward: -534.645 [-962.743, -230.177] - loss: 153182.484 - mae: 7603.202 - mean_q: 10250.685 Interval 3349 (1674000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3681 1 episodes - episode_reward: -453.618 [-453.618, -453.618] - loss: 144084.250 - mae: 7606.251 - mean_q: 10264.477 Interval 3350 (1674500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0956 7 episodes - episode_reward: -317.723 [-454.071, -121.455] - loss: 169625.547 - mae: 7625.443 - mean_q: 10289.963 Interval 3351 (1675000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1876 1 episodes - episode_reward: -984.824 [-984.824, -984.824] - loss: 168121.984 - mae: 7607.181 - mean_q: 10261.307 Interval 3352 (1675500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0902 2 episodes - episode_reward: -836.016 [-1212.631, -459.401] - loss: 184752.812 - mae: 7554.986 - mean_q: 10191.791 Interval 3353 (1676000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6087 2 episodes - episode_reward: -461.490 [-557.718, -365.262] - loss: 134071.406 - mae: 7528.978 - mean_q: 10156.925 Interval 3354 (1676500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0292 3 episodes - episode_reward: -592.341 [-629.828, -561.770] - loss: 154712.094 - mae: 7564.618 - mean_q: 10204.141 Interval 3355 (1677000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6121 3 episodes - episode_reward: -470.730 [-656.073, -159.762] - loss: 172185.281 - mae: 7514.164 - mean_q: 10140.794 Interval 3356 (1677500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4311 1 episodes - episode_reward: -699.449 [-699.449, -699.449] - loss: 142233.203 - mae: 7498.775 - mean_q: 10118.729 Interval 3357 (1678000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1073 2 episodes - episode_reward: -474.757 [-490.331, -459.182] - loss: 169502.031 - mae: 7400.299 - mean_q: 9981.802 Interval 3358 (1678500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2052 4 episodes - episode_reward: -429.583 [-683.846, -155.957] - loss: 188136.875 - mae: 7504.253 - mean_q: 10119.254 Interval 3359 (1679000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4653 3 episodes - episode_reward: -571.486 [-635.960, -516.158] - loss: 199962.672 - mae: 7345.160 - mean_q: 9908.187 Interval 3360 (1679500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0778 4 episodes - episode_reward: -373.160 [-763.767, -100.000] - loss: 219313.609 - mae: 7280.490 - mean_q: 9830.769 Interval 3361 (1680000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8514 4 episodes - episode_reward: -562.991 [-1028.807, -293.611] - loss: 185732.391 - mae: 7250.117 - mean_q: 9780.504 Interval 3362 (1680500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0635 3 episodes - episode_reward: -745.102 [-1003.357, -541.900] - loss: 198379.734 - mae: 7114.193 - mean_q: 9589.255 Interval 3363 (1681000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1102 4 episodes - episode_reward: -399.115 [-541.180, -273.861] - loss: 164328.203 - mae: 6977.194 - mean_q: 9391.848 Interval 3364 (1681500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3597 3 episodes - episode_reward: -868.169 [-2240.635, -156.993] - loss: 158898.547 - mae: 6963.343 - mean_q: 9368.831 Interval 3365 (1682000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.8178 3 episodes - episode_reward: -1332.128 [-1584.361, -850.002] - loss: 182171.844 - mae: 6827.270 - mean_q: 9179.209 Interval 3366 (1682500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.4497 5 episodes - episode_reward: -447.361 [-777.347, -259.070] - loss: 155664.484 - mae: 6759.216 - mean_q: 9105.136 Interval 3367 (1683000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.3411 5 episodes - episode_reward: -824.315 [-1510.675, -236.852] - loss: 168061.250 - mae: 6796.824 - mean_q: 9173.138 Interval 3368 (1683500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6150 5 episodes - episode_reward: -372.400 [-1121.887, -100.000] - loss: 165444.484 - mae: 7005.394 - mean_q: 9468.447 Interval 3369 (1684000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4785 4 episodes - episode_reward: -424.851 [-710.215, -164.106] - loss: 222358.266 - mae: 7278.176 - mean_q: 9841.966 Interval 3370 (1684500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5265 3 episodes - episode_reward: -416.576 [-773.712, -170.511] - loss: 241036.500 - mae: 7518.553 - mean_q: 10174.547 Interval 3371 (1685000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4636 5 episodes - episode_reward: -451.883 [-590.323, -174.236] - loss: 207143.844 - mae: 7832.678 - mean_q: 10619.665 Interval 3372 (1685500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.0245 6 episodes - episode_reward: -491.528 [-780.378, -105.624] - loss: 241923.297 - mae: 8271.070 - mean_q: 11223.631 Interval 3373 (1686000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.8318 5 episodes - episode_reward: -674.521 [-1124.029, -324.378] - loss: 292429.000 - mae: 8865.414 - mean_q: 12062.637 Interval 3374 (1686500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1463 5 episodes - episode_reward: -324.393 [-723.851, -110.379] - loss: 326344.250 - mae: 9459.461 - mean_q: 12853.165 Interval 3375 (1687000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6881 5 episodes - episode_reward: -272.344 [-400.729, -170.292] - loss: 299614.656 - mae: 10080.930 - mean_q: 13709.975 Interval 3376 (1687500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8697 4 episodes - episode_reward: -345.856 [-563.532, -159.808] - loss: 400595.875 - mae: 10971.472 - mean_q: 14938.526 Interval 3377 (1688000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3773 7 episodes - episode_reward: -325.021 [-607.801, -100.000] - loss: 441060.375 - mae: 11582.738 - mean_q: 15785.679 Interval 3378 (1688500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0754 3 episodes - episode_reward: -657.197 [-1045.578, -238.728] - loss: 492192.250 - mae: 12491.416 - mean_q: 17024.594 Interval 3379 (1689000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3680 7 episodes - episode_reward: -342.001 [-724.360, -93.168] - loss: 529247.625 - mae: 13216.469 - mean_q: 18019.555 Interval 3380 (1689500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3701 4 episodes - episode_reward: -498.123 [-833.865, -319.411] - loss: 602518.875 - mae: 13907.457 - mean_q: 18947.516 Interval 3381 (1690000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9064 2 episodes - episode_reward: -690.844 [-987.586, -394.102] - loss: 604609.000 - mae: 14383.434 - mean_q: 19560.469 Interval 3382 (1690500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1043 5 episodes - episode_reward: -432.646 [-735.092, -132.565] - loss: 642191.438 - mae: 14731.460 - mean_q: 20029.348 Interval 3383 (1691000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4944 Interval 3384 (1691500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.1934 4 episodes - episode_reward: -699.277 [-852.924, -586.294] - loss: 694299.312 - mae: 16035.527 - mean_q: 21820.861 Interval 3385 (1692000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1492 Interval 3386 (1692500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5278 2 episodes - episode_reward: -633.711 [-1135.658, -131.765] - loss: 773359.438 - mae: 17123.789 - mean_q: 23278.844 Interval 3387 (1693000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2218 Interval 3388 (1693500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.4560 3 episodes - episode_reward: -813.060 [-983.743, -709.463] - loss: 765197.750 - mae: 18338.596 - mean_q: 24930.689 Interval 3389 (1694000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0884 3 episodes - episode_reward: -350.535 [-679.047, -170.055] - loss: 720264.625 - mae: 18293.035 - mean_q: 24875.795 Interval 3390 (1694500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2790 1 episodes - episode_reward: -619.465 [-619.465, -619.465] - loss: 966409.188 - mae: 18502.199 - mean_q: 25130.574 Interval 3391 (1695000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4299 1 episodes - episode_reward: -600.652 [-600.652, -600.652] - loss: 715635.750 - mae: 18552.137 - mean_q: 25212.293 Interval 3392 (1695500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2035 2 episodes - episode_reward: -331.746 [-360.187, -303.305] - loss: 663192.438 - mae: 19133.264 - mean_q: 26012.002 Interval 3393 (1696000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5391 1 episodes - episode_reward: -223.746 [-223.746, -223.746] - loss: 760371.625 - mae: 19098.660 - mean_q: 25975.268 Interval 3394 (1696500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.1024 Interval 3395 (1697000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2988 Interval 3396 (1697500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0553 Interval 3397 (1698000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8593 3 episodes - episode_reward: -283.322 [-447.361, -100.000] - loss: 806320.375 - mae: 21410.816 - mean_q: 29183.176 Interval 3398 (1698500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5374 1 episodes - episode_reward: -1031.687 [-1031.687, -1031.687] - loss: 1041296.750 - mae: 22360.521 - mean_q: 30465.379 Interval 3399 (1699000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2478 Interval 3400 (1699500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0227 Interval 3401 (1700000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1227 Interval 3402 (1700500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4696 1 episodes - episode_reward: -827.637 [-827.637, -827.637] - loss: 1644679.250 - mae: 24431.834 - mean_q: 33075.078 Interval 3403 (1701000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9019 3 episodes - episode_reward: -405.002 [-611.995, -108.820] - loss: 1894498.375 - mae: 24619.398 - mean_q: 33306.262 Interval 3404 (1701500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8430 1 episodes - episode_reward: -68.449 [-68.449, -68.449] - loss: 2312176.750 - mae: 24086.576 - mean_q: 32529.934 Interval 3405 (1702000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8503 2 episodes - episode_reward: -520.240 [-578.522, -461.957] - loss: 1058524.125 - mae: 23640.277 - mean_q: 31941.832 Interval 3406 (1702500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4027 3 episodes - episode_reward: -366.278 [-833.557, -62.392] - loss: 1316189.125 - mae: 23563.453 - mean_q: 31839.480 Interval 3407 (1703000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9679 2 episodes - episode_reward: -570.894 [-637.433, -504.355] - loss: 1007602.062 - mae: 23012.352 - mean_q: 31087.324 Interval 3408 (1703500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0655 1 episodes - episode_reward: -696.931 [-696.931, -696.931] - loss: 1006734.812 - mae: 22492.693 - mean_q: 30392.195 Interval 3409 (1704000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9797 2 episodes - episode_reward: -856.633 [-1580.252, -133.015] - loss: 780751.625 - mae: 22153.797 - mean_q: 29950.631 Interval 3410 (1704500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0821 2 episodes - episode_reward: -1522.903 [-2505.640, -540.167] - loss: 1080066.000 - mae: 21799.531 - mean_q: 29454.270 Interval 3411 (1705000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0927 2 episodes - episode_reward: -805.991 [-1012.584, -599.399] - loss: 1270239.125 - mae: 21416.053 - mean_q: 28961.693 Interval 3412 (1705500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1266 2 episodes - episode_reward: -263.834 [-392.134, -135.534] - loss: 862278.438 - mae: 21285.354 - mean_q: 28789.188 Interval 3413 (1706000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4509 2 episodes - episode_reward: -233.475 [-368.037, -98.913] - loss: 627154.375 - mae: 21270.020 - mean_q: 28788.848 Interval 3414 (1706500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0489 1 episodes - episode_reward: -719.870 [-719.870, -719.870] - loss: 853867.438 - mae: 20843.232 - mean_q: 28219.328 Interval 3415 (1707000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6586 4 episodes - episode_reward: -249.051 [-594.235, -100.000] - loss: 685675.375 - mae: 20681.818 - mean_q: 28035.738 Interval 3416 (1707500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0972 1 episodes - episode_reward: -483.691 [-483.691, -483.691] - loss: 586959.562 - mae: 20988.840 - mean_q: 28471.373 Interval 3417 (1708000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4334 Interval 3418 (1708500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9818 2 episodes - episode_reward: -671.938 [-952.430, -391.446] - loss: 478518.781 - mae: 21847.490 - mean_q: 29652.404 Interval 3419 (1709000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.2885 Interval 3420 (1709500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.6849 1 episodes - episode_reward: -407.350 [-407.350, -407.350] - loss: 596416.812 - mae: 22896.344 - mean_q: 31096.693 Interval 3421 (1710000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1432 2 episodes - episode_reward: -258.292 [-292.673, -223.910] - loss: 614973.062 - mae: 23403.332 - mean_q: 31771.789 Interval 3422 (1710500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5874 1 episodes - episode_reward: -365.705 [-365.705, -365.705] - loss: 629818.250 - mae: 23885.879 - mean_q: 32419.711 Interval 3423 (1711000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1227 Interval 3424 (1711500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6094 4 episodes - episode_reward: -240.976 [-378.793, -106.975] - loss: 638714.875 - mae: 24783.574 - mean_q: 33603.004 Interval 3425 (1712000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2196 Interval 3426 (1712500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1232 Interval 3427 (1713000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1888 Interval 3428 (1713500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1127 Interval 3429 (1714000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1106 Interval 3430 (1714500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1211 Interval 3431 (1715000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.3970 Interval 3432 (1715500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2981 1 episodes - episode_reward: -802.895 [-802.895, -802.895] - loss: 561608.625 - mae: 24370.035 - mean_q: 32927.504 Interval 3433 (1716000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2025 Interval 3434 (1716500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5326 1 episodes - episode_reward: -323.180 [-323.180, -323.180] - loss: 699207.000 - mae: 24198.842 - mean_q: 32657.162 Interval 3435 (1717000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: 0.0104 Interval 3436 (1717500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6395 1 episodes - episode_reward: -303.719 [-303.719, -303.719] - loss: 609921.062 - mae: 23529.650 - mean_q: 31725.902 Interval 3437 (1718000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4248 1 episodes - episode_reward: -231.399 [-231.399, -231.399] - loss: 502483.750 - mae: 23160.527 - mean_q: 31226.148 Interval 3438 (1718500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.3790 1 episodes - episode_reward: -238.280 [-238.280, -238.280] - loss: 528663.625 - mae: 22805.170 - mean_q: 30724.719 Interval 3439 (1719000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5398 1 episodes - episode_reward: -211.683 [-211.683, -211.683] - loss: 536267.188 - mae: 22418.596 - mean_q: 30184.977 Interval 3440 (1719500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5507 Interval 3441 (1720000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3311 1 episodes - episode_reward: -1501.643 [-1501.643, -1501.643] - loss: 647287.500 - mae: 21751.043 - mean_q: 29256.006 Interval 3442 (1720500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1499 Interval 3443 (1721000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3936 Interval 3444 (1721500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.7430 Interval 3445 (1722000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5797 3 episodes - episode_reward: -1053.368 [-2919.220, -80.205] - loss: 394436.719 - mae: 20344.404 - mean_q: 27358.592 Interval 3446 (1722500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3076 2 episodes - episode_reward: -990.039 [-1650.233, -329.846] - loss: 459214.562 - mae: 20043.656 - mean_q: 26951.570 Interval 3447 (1723000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3238 Interval 3448 (1723500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3741 2 episodes - episode_reward: -195.162 [-281.331, -108.993] - loss: 503541.562 - mae: 19735.141 - mean_q: 26529.162 Interval 3449 (1724000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2171 1 episodes - episode_reward: -507.497 [-507.497, -507.497] - loss: 484377.250 - mae: 19484.059 - mean_q: 26197.760 Interval 3450 (1724500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0485 Interval 3451 (1725000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1376 Interval 3452 (1725500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1570 Interval 3453 (1726000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.3011 Interval 3454 (1726500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.9780 Interval 3455 (1727000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1666 Interval 3456 (1727500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1770 1 episodes - episode_reward: -1447.816 [-1447.816, -1447.816] - loss: 515008.031 - mae: 18894.316 - mean_q: 25419.043 Interval 3457 (1728000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3660 Interval 3458 (1728500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9312 1 episodes - episode_reward: -1485.202 [-1485.202, -1485.202] - loss: 661835.750 - mae: 19181.832 - mean_q: 25792.895 Interval 3459 (1729000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3864 1 episodes - episode_reward: -360.879 [-360.879, -360.879] - loss: 564195.625 - mae: 18834.750 - mean_q: 25319.701 Interval 3460 (1729500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0914 Interval 3461 (1730000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7037 1 episodes - episode_reward: -543.668 [-543.668, -543.668] - loss: 587771.188 - mae: 19331.357 - mean_q: 26008.389 Interval 3462 (1730500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3132 1 episodes - episode_reward: -451.946 [-451.946, -451.946] - loss: 498474.562 - mae: 19432.354 - mean_q: 26147.623 Interval 3463 (1731000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.8929 1 episodes - episode_reward: -1545.413 [-1545.413, -1545.413] - loss: 592966.312 - mae: 19359.732 - mean_q: 26047.426 Interval 3464 (1731500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1326 Interval 3465 (1732000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1166 Interval 3466 (1732500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3175 Interval 3467 (1733000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1073 1 episodes - episode_reward: -677.842 [-677.842, -677.842] - loss: 521887.750 - mae: 19864.662 - mean_q: 26729.152 Interval 3468 (1733500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6397 Interval 3469 (1734000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4108 2 episodes - episode_reward: -606.882 [-1095.998, -117.766] - loss: 514486.875 - mae: 20134.227 - mean_q: 27113.299 Interval 3470 (1734500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2811 4 episodes - episode_reward: -153.919 [-271.925, -107.656] - loss: 489398.125 - mae: 20451.818 - mean_q: 27528.250 Interval 3471 (1735000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4306 Interval 3472 (1735500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9207 2 episodes - episode_reward: -247.045 [-355.781, -138.309] - loss: 519347.500 - mae: 20635.527 - mean_q: 27785.141 Interval 3473 (1736000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1186 Interval 3474 (1736500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2326 Interval 3475 (1737000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1536 Interval 3476 (1737500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1884 Interval 3477 (1738000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1878 Interval 3478 (1738500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.1370 Interval 3479 (1739000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.8038 Interval 3480 (1739500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -1.5273 1 episodes - episode_reward: -1725.174 [-1725.174, -1725.174] - loss: 512621.469 - mae: 20628.072 - mean_q: 27753.801 Interval 3481 (1740000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3640 Interval 3482 (1740500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8656 Interval 3483 (1741000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5178 1 episodes - episode_reward: -1167.064 [-1167.064, -1167.064] - loss: 399711.688 - mae: 20062.258 - mean_q: 26993.666 Interval 3484 (1741500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6864 1 episodes - episode_reward: -996.556 [-996.556, -996.556] - loss: 447689.562 - mae: 20031.471 - mean_q: 26943.986 Interval 3485 (1742000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2045 Interval 3486 (1742500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6434 2 episodes - episode_reward: -589.181 [-1078.362, -100.000] - loss: 389349.844 - mae: 19606.629 - mean_q: 26367.891 Interval 3487 (1743000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9074 1 episodes - episode_reward: -1253.154 [-1253.154, -1253.154] - loss: 409011.281 - mae: 19388.184 - mean_q: 26062.744 Interval 3488 (1743500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5633 Interval 3489 (1744000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6312 Interval 3490 (1744500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8381 2 episodes - episode_reward: -705.100 [-749.183, -661.017] - loss: 334652.469 - mae: 18933.289 - mean_q: 25450.221 Interval 3491 (1745000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6773 1 episodes - episode_reward: -1130.213 [-1130.213, -1130.213] - loss: 336290.469 - mae: 18658.990 - mean_q: 25074.357 Interval 3492 (1745500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4784 2 episodes - episode_reward: -438.513 [-776.601, -100.426] - loss: 441933.312 - mae: 18503.143 - mean_q: 24852.652 Interval 3493 (1746000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0279 Interval 3494 (1746500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9802 1 episodes - episode_reward: -1065.987 [-1065.987, -1065.987] - loss: 431190.375 - mae: 18256.275 - mean_q: 24529.322 Interval 3495 (1747000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6909 1 episodes - episode_reward: -298.376 [-298.376, -298.376] - loss: 435804.031 - mae: 18090.254 - mean_q: 24319.461 Interval 3496 (1747500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8880 Interval 3497 (1748000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4555 2 episodes - episode_reward: -627.507 [-656.786, -598.229] - loss: 390221.250 - mae: 17765.637 - mean_q: 23881.295 Interval 3498 (1748500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1669 1 episodes - episode_reward: -560.266 [-560.266, -560.266] - loss: 354129.250 - mae: 17713.193 - mean_q: 23819.514 Interval 3499 (1749000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9810 1 episodes - episode_reward: -608.718 [-608.718, -608.718] - loss: 356178.875 - mae: 17556.578 - mean_q: 23602.559 Interval 3500 (1749500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6896 Interval 3501 (1750000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8497 1 episodes - episode_reward: -474.783 [-474.783, -474.783] - loss: 365667.750 - mae: 17500.930 - mean_q: 23519.395 Interval 3502 (1750500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4376 6 episodes - episode_reward: -312.262 [-514.398, -100.000] - loss: 397548.094 - mae: 17654.834 - mean_q: 23730.270 Interval 3503 (1751000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2333 4 episodes - episode_reward: -268.756 [-447.568, -100.000] - loss: 340178.844 - mae: 17559.115 - mean_q: 23576.031 Interval 3504 (1751500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0147 Interval 3505 (1752000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2126 2 episodes - episode_reward: -427.279 [-718.770, -135.788] - loss: 351494.844 - mae: 17797.307 - mean_q: 23884.080 Interval 3506 (1752500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5189 1 episodes - episode_reward: -139.575 [-139.575, -139.575] - loss: 373610.656 - mae: 18022.381 - mean_q: 24192.055 Interval 3507 (1753000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1991 2 episodes - episode_reward: -335.904 [-410.561, -261.247] - loss: 448073.094 - mae: 18214.027 - mean_q: 24450.486 Interval 3508 (1753500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4105 1 episodes - episode_reward: -450.334 [-450.334, -450.334] - loss: 452608.344 - mae: 18361.059 - mean_q: 24658.420 Interval 3509 (1754000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8240 3 episodes - episode_reward: -214.284 [-402.995, -111.077] - loss: 527318.000 - mae: 18443.369 - mean_q: 24780.012 Interval 3510 (1754500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7355 2 episodes - episode_reward: -218.662 [-314.315, -123.009] - loss: 500187.469 - mae: 18618.352 - mean_q: 25035.604 Interval 3511 (1755000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9358 2 episodes - episode_reward: -202.925 [-294.061, -111.789] - loss: 473368.312 - mae: 18760.029 - mean_q: 25217.795 Interval 3512 (1755500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5752 1 episodes - episode_reward: -307.630 [-307.630, -307.630] - loss: 618681.125 - mae: 18807.297 - mean_q: 25305.660 Interval 3513 (1756000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7165 2 episodes - episode_reward: -390.213 [-628.912, -151.515] - loss: 477000.781 - mae: 18984.383 - mean_q: 25574.703 Interval 3514 (1756500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8709 2 episodes - episode_reward: -215.816 [-226.868, -204.764] - loss: 492204.438 - mae: 18997.148 - mean_q: 25610.533 Interval 3515 (1757000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8314 5 episodes - episode_reward: -191.368 [-255.821, -107.783] - loss: 511289.031 - mae: 19206.404 - mean_q: 25908.516 Interval 3516 (1757500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0331 Interval 3517 (1758000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1739 Interval 3518 (1758500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2813 Interval 3519 (1759000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5706 1 episodes - episode_reward: -524.007 [-524.007, -524.007] - loss: 612383.312 - mae: 19867.123 - mean_q: 26893.082 Interval 3520 (1759500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9792 2 episodes - episode_reward: -254.296 [-266.617, -241.975] - loss: 471005.344 - mae: 20123.533 - mean_q: 27263.473 Interval 3521 (1760000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5707 1 episodes - episode_reward: -272.943 [-272.943, -272.943] - loss: 645954.375 - mae: 20421.023 - mean_q: 27668.666 Interval 3522 (1760500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0423 2 episodes - episode_reward: -164.911 [-225.937, -103.885] - loss: 655658.438 - mae: 20597.729 - mean_q: 27900.000 Interval 3523 (1761000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1674 3 episodes - episode_reward: -242.736 [-308.219, -185.706] - loss: 800645.312 - mae: 20713.330 - mean_q: 28048.652 Interval 3524 (1761500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3397 Interval 3525 (1762000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4407 1 episodes - episode_reward: -926.874 [-926.874, -926.874] - loss: 554364.625 - mae: 20999.738 - mean_q: 28399.322 Interval 3526 (1762500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7953 1 episodes - episode_reward: -345.594 [-345.594, -345.594] - loss: 645792.125 - mae: 21215.312 - mean_q: 28656.307 Interval 3527 (1763000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7413 1 episodes - episode_reward: -275.561 [-275.561, -275.561] - loss: 608839.562 - mae: 21200.494 - mean_q: 28630.344 Interval 3528 (1763500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3347 4 episodes - episode_reward: -590.433 [-914.384, -100.000] - loss: 868875.688 - mae: 20809.783 - mean_q: 28068.578 Interval 3529 (1764000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1984 Interval 3530 (1764500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1252 Interval 3531 (1765000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1531 Interval 3532 (1765500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1933 Interval 3533 (1766000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1508 Interval 3534 (1766500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.1793 Interval 3535 (1767000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1436 Interval 3536 (1767500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1764 Interval 3537 (1768000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.8756 Interval 3538 (1768500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -1.3015 1 episodes - episode_reward: -1637.975 [-1637.975, -1637.975] - loss: 439462.219 - mae: 17365.111 - mean_q: 23268.484 Interval 3539 (1769000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7687 1 episodes - episode_reward: -855.504 [-855.504, -855.504] - loss: 396332.688 - mae: 16809.736 - mean_q: 22521.082 Interval 3540 (1769500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1816 2 episodes - episode_reward: -473.753 [-476.995, -470.510] - loss: 387863.531 - mae: 16406.639 - mean_q: 21968.496 Interval 3541 (1770000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4806 2 episodes - episode_reward: -241.513 [-335.894, -147.131] - loss: 352487.906 - mae: 16001.473 - mean_q: 21424.719 Interval 3542 (1770500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7629 2 episodes - episode_reward: -139.891 [-179.782, -100.000] - loss: 310836.500 - mae: 15785.567 - mean_q: 21130.859 Interval 3543 (1771000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3333 Interval 3544 (1771500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1406 Interval 3545 (1772000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1392 Interval 3546 (1772500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7674 2 episodes - episode_reward: -569.985 [-717.151, -422.819] - loss: 276117.219 - mae: 14705.642 - mean_q: 19693.025 Interval 3547 (1773000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2004 Interval 3548 (1773500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0090 Interval 3549 (1774000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3765 1 episodes - episode_reward: -300.830 [-300.830, -300.830] - loss: 287322.406 - mae: 14070.757 - mean_q: 18863.145 Interval 3550 (1774500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3643 1 episodes - episode_reward: -199.389 [-199.389, -199.389] - loss: 165600.562 - mae: 13786.977 - mean_q: 18488.652 Interval 3551 (1775000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2873 Interval 3552 (1775500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3135 1 episodes - episode_reward: -257.444 [-257.444, -257.444] - loss: 186950.547 - mae: 13444.892 - mean_q: 18016.314 Interval 3553 (1776000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4225 1 episodes - episode_reward: -255.741 [-255.741, -255.741] - loss: 182114.875 - mae: 13143.718 - mean_q: 17625.488 Interval 3554 (1776500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1126 Interval 3555 (1777000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2543 2 episodes - episode_reward: -335.921 [-514.492, -157.350] - loss: 160049.562 - mae: 12777.179 - mean_q: 17134.484 Interval 3556 (1777500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1758 Interval 3557 (1778000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7995 2 episodes - episode_reward: -239.610 [-251.177, -228.044] - loss: 158177.500 - mae: 12426.731 - mean_q: 16664.299 Interval 3558 (1778500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2171 2 episodes - episode_reward: -250.071 [-367.432, -132.709] - loss: 142052.266 - mae: 12003.027 - mean_q: 16096.195 Interval 3559 (1779000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2842 1 episodes - episode_reward: -315.184 [-315.184, -315.184] - loss: 128686.023 - mae: 11773.168 - mean_q: 15790.728 Interval 3560 (1779500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2433 Interval 3561 (1780000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1177 Interval 3562 (1780500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1753 Interval 3563 (1781000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.5361 1 episodes - episode_reward: -997.442 [-997.442, -997.442] - loss: 111049.688 - mae: 10870.489 - mean_q: 14569.181 Interval 3564 (1781500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8883 1 episodes - episode_reward: -1332.631 [-1332.631, -1332.631] - loss: 110269.953 - mae: 10580.295 - mean_q: 14174.611 Interval 3565 (1782000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4164 2 episodes - episode_reward: -1429.538 [-1821.781, -1037.296] - loss: 113753.703 - mae: 10203.279 - mean_q: 13672.595 Interval 3566 (1782500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.5354 1 episodes - episode_reward: -1915.967 [-1915.967, -1915.967] - loss: 101577.312 - mae: 9937.566 - mean_q: 13320.614 Interval 3567 (1783000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4007 1 episodes - episode_reward: -1859.442 [-1859.442, -1859.442] - loss: 106127.906 - mae: 9621.395 - mean_q: 12886.002 Interval 3568 (1783500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.6416 1 episodes - episode_reward: -1762.728 [-1762.728, -1762.728] - loss: 94605.555 - mae: 9348.675 - mean_q: 12512.498 Interval 3569 (1784000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.4369 1 episodes - episode_reward: -2064.458 [-2064.458, -2064.458] - loss: 92860.391 - mae: 9125.862 - mean_q: 12209.473 Interval 3570 (1784500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.3765 2 episodes - episode_reward: -1770.871 [-1817.767, -1723.974] - loss: 79362.711 - mae: 8819.319 - mean_q: 11801.434 Interval 3571 (1785000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.6254 1 episodes - episode_reward: -1809.830 [-1809.830, -1809.830] - loss: 82952.852 - mae: 8481.302 - mean_q: 11340.376 Interval 3572 (1785500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.9937 3 episodes - episode_reward: -717.848 [-1709.981, -106.748] - loss: 70091.117 - mae: 8197.561 - mean_q: 10959.010 Interval 3573 (1786000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.8854 2 episodes - episode_reward: -1090.064 [-1649.703, -530.425] - loss: 75065.766 - mae: 7896.899 - mean_q: 10551.248 Interval 3574 (1786500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3884 2 episodes - episode_reward: -981.973 [-1794.537, -169.408] - loss: 77403.375 - mae: 7612.667 - mean_q: 10162.676 Interval 3575 (1787000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6803 2 episodes - episode_reward: -798.264 [-1200.779, -395.749] - loss: 68262.719 - mae: 7388.000 - mean_q: 9860.877 Interval 3576 (1787500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4346 1 episodes - episode_reward: -687.072 [-687.072, -687.072] - loss: 74858.859 - mae: 7130.074 - mean_q: 9508.056 Interval 3577 (1788000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2436 2 episodes - episode_reward: -361.520 [-375.092, -347.948] - loss: 65393.602 - mae: 6887.121 - mean_q: 9189.166 Interval 3578 (1788500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4669 2 episodes - episode_reward: -299.799 [-336.881, -262.717] - loss: 73824.781 - mae: 6640.548 - mean_q: 8846.993 Interval 3579 (1789000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0459 2 episodes - episode_reward: -342.229 [-447.633, -236.826] - loss: 71821.766 - mae: 6475.023 - mean_q: 8618.551 Interval 3580 (1789500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0913 2 episodes - episode_reward: -250.000 [-400.000, -100.000] - loss: 63853.254 - mae: 6212.863 - mean_q: 8265.327 Interval 3581 (1790000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8225 1 episodes - episode_reward: -519.634 [-519.634, -519.634] - loss: 65499.527 - mae: 5951.997 - mean_q: 7916.629 Interval 3582 (1790500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8744 1 episodes - episode_reward: -342.632 [-342.632, -342.632] - loss: 65430.648 - mae: 5838.686 - mean_q: 7771.647 Interval 3583 (1791000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9364 1 episodes - episode_reward: -460.464 [-460.464, -460.464] - loss: 57765.754 - mae: 5651.561 - mean_q: 7525.975 Interval 3584 (1791500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4786 2 episodes - episode_reward: -483.548 [-626.074, -341.021] - loss: 65501.898 - mae: 5446.380 - mean_q: 7251.759 Interval 3585 (1792000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8165 3 episodes - episode_reward: -405.830 [-555.629, -261.024] - loss: 62127.312 - mae: 5312.588 - mean_q: 7075.614 Interval 3586 (1792500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1403 Interval 3587 (1793000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3024 Interval 3588 (1793500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0687 Interval 3589 (1794000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7930 1 episodes - episode_reward: -701.773 [-701.773, -701.773] - loss: 68913.906 - mae: 4810.878 - mean_q: 6412.617 Interval 3590 (1794500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0432 Interval 3591 (1795000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4499 Interval 3592 (1795500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2703 1 episodes - episode_reward: -769.567 [-769.567, -769.567] - loss: 68362.062 - mae: 4616.111 - mean_q: 6156.469 Interval 3593 (1796000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0467 Interval 3594 (1796500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4068 1 episodes - episode_reward: -742.949 [-742.949, -742.949] - loss: 61989.434 - mae: 4498.813 - mean_q: 6025.275 Interval 3595 (1797000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7551 1 episodes - episode_reward: -469.344 [-469.344, -469.344] - loss: 65909.266 - mae: 4525.153 - mean_q: 6050.303 Interval 3596 (1797500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1458 Interval 3597 (1798000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4235 3 episodes - episode_reward: -250.559 [-345.441, -100.000] - loss: 58038.980 - mae: 4480.208 - mean_q: 5991.946 Interval 3598 (1798500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5818 Interval 3599 (1799000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6861 3 episodes - episode_reward: -539.950 [-755.725, -142.481] - loss: 64464.332 - mae: 4415.872 - mean_q: 5901.600 Interval 3600 (1799500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2765 1 episodes - episode_reward: -531.988 [-531.988, -531.988] - loss: 63654.602 - mae: 4373.768 - mean_q: 5846.020 Interval 3601 (1800000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1294 2 episodes - episode_reward: -534.670 [-802.126, -267.213] - loss: 60468.906 - mae: 4371.683 - mean_q: 5835.820 Interval 3602 (1800500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0250 Interval 3603 (1801000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1932 Interval 3604 (1801500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2781 1 episodes - episode_reward: -707.669 [-707.669, -707.669] - loss: 54734.125 - mae: 4221.851 - mean_q: 5639.074 Interval 3605 (1802000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1152 Interval 3606 (1802500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3686 2 episodes - episode_reward: -537.286 [-539.967, -534.606] - loss: 61279.348 - mae: 4164.789 - mean_q: 5554.462 Interval 3607 (1803000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7782 2 episodes - episode_reward: -517.740 [-554.468, -481.012] - loss: 49085.766 - mae: 4143.916 - mean_q: 5535.387 Interval 3608 (1803500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0827 5 episodes - episode_reward: -297.686 [-575.328, -100.000] - loss: 47307.309 - mae: 4086.690 - mean_q: 5457.317 Interval 3609 (1804000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8223 1 episodes - episode_reward: -461.413 [-461.413, -461.413] - loss: 51667.176 - mae: 4014.707 - mean_q: 5354.949 Interval 3610 (1804500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9207 1 episodes - episode_reward: -475.967 [-475.967, -475.967] - loss: 55488.566 - mae: 4051.113 - mean_q: 5404.376 Interval 3611 (1805000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9263 3 episodes - episode_reward: -478.429 [-510.955, -439.862] - loss: 47733.445 - mae: 3958.140 - mean_q: 5276.775 Interval 3612 (1805500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5521 3 episodes - episode_reward: -407.790 [-589.613, -272.805] - loss: 56945.859 - mae: 3940.690 - mean_q: 5250.538 Interval 3613 (1806000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1359 3 episodes - episode_reward: -217.666 [-267.403, -192.596] - loss: 54686.617 - mae: 3875.157 - mean_q: 5160.125 Interval 3614 (1806500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7802 2 episodes - episode_reward: -663.579 [-825.059, -502.099] - loss: 43503.426 - mae: 3775.597 - mean_q: 5024.436 Interval 3615 (1807000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2249 3 episodes - episode_reward: -393.061 [-545.562, -148.277] - loss: 44460.477 - mae: 3771.371 - mean_q: 5020.703 Interval 3616 (1807500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1742 Interval 3617 (1808000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3396 3 episodes - episode_reward: -384.354 [-556.951, -156.961] - loss: 49237.637 - mae: 3638.442 - mean_q: 4832.192 Interval 3618 (1808500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2160 3 episodes - episode_reward: -406.624 [-608.674, -272.733] - loss: 45292.180 - mae: 3571.171 - mean_q: 4738.712 Interval 3619 (1809000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8039 4 episodes - episode_reward: -427.436 [-529.260, -200.354] - loss: 44915.789 - mae: 3470.116 - mean_q: 4597.720 Interval 3620 (1809500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9255 5 episodes - episode_reward: -409.368 [-497.394, -201.555] - loss: 38713.266 - mae: 3381.107 - mean_q: 4475.133 Interval 3621 (1810000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3514 4 episodes - episode_reward: -444.759 [-649.187, -285.151] - loss: 38138.512 - mae: 3309.832 - mean_q: 4378.549 Interval 3622 (1810500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0001 3 episodes - episode_reward: -322.708 [-372.582, -278.582] - loss: 39420.152 - mae: 3290.616 - mean_q: 4354.450 Interval 3623 (1811000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2249 4 episodes - episode_reward: -372.758 [-468.076, -148.098] - loss: 42331.734 - mae: 3237.368 - mean_q: 4281.072 Interval 3624 (1811500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2508 1 episodes - episode_reward: -659.312 [-659.312, -659.312] - loss: 38377.309 - mae: 3187.188 - mean_q: 4211.180 Interval 3625 (1812000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7591 5 episodes - episode_reward: -483.856 [-851.058, -199.449] - loss: 41606.648 - mae: 3120.404 - mean_q: 4119.879 Interval 3626 (1812500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0187 4 episodes - episode_reward: -400.032 [-621.455, -143.122] - loss: 43724.562 - mae: 3123.978 - mean_q: 4126.381 Interval 3627 (1813000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4495 3 episodes - episode_reward: -435.625 [-542.556, -293.182] - loss: 40837.422 - mae: 3042.967 - mean_q: 4017.847 Interval 3628 (1813500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3155 2 episodes - episode_reward: -316.928 [-453.016, -180.840] - loss: 40109.746 - mae: 3035.716 - mean_q: 4010.697 Interval 3629 (1814000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3131 Interval 3630 (1814500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3139 5 episodes - episode_reward: -259.879 [-455.154, -100.000] - loss: 43547.262 - mae: 2937.579 - mean_q: 3879.725 Interval 3631 (1815000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6094 3 episodes - episode_reward: -409.683 [-545.748, -209.944] - loss: 43550.629 - mae: 2936.319 - mean_q: 3876.301 Interval 3632 (1815500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3966 1 episodes - episode_reward: -191.176 [-191.176, -191.176] - loss: 43596.227 - mae: 2839.540 - mean_q: 3741.545 Interval 3633 (1816000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9802 2 episodes - episode_reward: -525.358 [-661.749, -388.968] - loss: 44482.711 - mae: 2825.534 - mean_q: 3725.965 Interval 3634 (1816500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7106 4 episodes - episode_reward: -455.501 [-575.998, -270.407] - loss: 41340.199 - mae: 2747.501 - mean_q: 3620.463 Interval 3635 (1817000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5555 3 episodes - episode_reward: -423.671 [-590.834, -265.573] - loss: 38851.512 - mae: 2731.508 - mean_q: 3602.585 Interval 3636 (1817500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7409 1 episodes - episode_reward: -718.193 [-718.193, -718.193] - loss: 38020.668 - mae: 2635.490 - mean_q: 3473.909 Interval 3637 (1818000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0563 5 episodes - episode_reward: -445.926 [-1059.709, -113.836] - loss: 37066.195 - mae: 2593.538 - mean_q: 3419.623 Interval 3638 (1818500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.2464 4 episodes - episode_reward: -522.387 [-661.235, -350.366] - loss: 39219.336 - mae: 2527.804 - mean_q: 3331.754 Interval 3639 (1819000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8548 4 episodes - episode_reward: -359.615 [-620.891, -193.845] - loss: 35638.047 - mae: 2494.187 - mean_q: 3287.774 Interval 3640 (1819500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.5113 3 episodes - episode_reward: -729.769 [-847.747, -571.855] - loss: 32252.148 - mae: 2385.210 - mean_q: 3143.238 Interval 3641 (1820000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4252 6 episodes - episode_reward: -380.863 [-714.466, -111.656] - loss: 30052.500 - mae: 2337.497 - mean_q: 3077.823 Interval 3642 (1820500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5297 Interval 3643 (1821000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5417 2 episodes - episode_reward: -622.357 [-1026.901, -217.813] - loss: 28530.699 - mae: 2180.705 - mean_q: 2866.253 Interval 3644 (1821500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6179 4 episodes - episode_reward: -388.842 [-690.075, -179.860] - loss: 26509.588 - mae: 2086.197 - mean_q: 2735.525 Interval 3645 (1822000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2430 2 episodes - episode_reward: -833.773 [-1155.089, -512.457] - loss: 26747.473 - mae: 1998.927 - mean_q: 2618.604 Interval 3646 (1822500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.8955 3 episodes - episode_reward: -560.726 [-984.577, -258.722] - loss: 27051.572 - mae: 1950.431 - mean_q: 2557.848 Interval 3647 (1823000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3634 5 episodes - episode_reward: -527.739 [-840.181, -346.615] - loss: 23205.662 - mae: 1820.905 - mean_q: 2381.673 Interval 3648 (1823500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3050 4 episodes - episode_reward: -724.498 [-894.632, -622.191] - loss: 22058.131 - mae: 1744.728 - mean_q: 2279.324 Interval 3649 (1824000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8051 1 episodes - episode_reward: -713.479 [-713.479, -713.479] - loss: 20259.178 - mae: 1654.330 - mean_q: 2156.046 Interval 3650 (1824500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3056 3 episodes - episode_reward: -656.723 [-834.252, -396.913] - loss: 20237.938 - mae: 1627.996 - mean_q: 2122.748 Interval 3651 (1825000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3715 3 episodes - episode_reward: -704.110 [-993.697, -550.284] - loss: 19708.379 - mae: 1521.120 - mean_q: 1975.124 Interval 3652 (1825500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5799 2 episodes - episode_reward: -753.022 [-764.738, -741.307] - loss: 16834.672 - mae: 1455.385 - mean_q: 1887.902 Interval 3653 (1826000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8038 7 episodes - episode_reward: -453.165 [-668.008, -116.778] - loss: 16693.297 - mae: 1401.466 - mean_q: 1814.978 Interval 3654 (1826500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6009 2 episodes - episode_reward: -619.804 [-886.417, -353.191] - loss: 16271.463 - mae: 1313.895 - mean_q: 1695.031 Interval 3655 (1827000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5648 2 episodes - episode_reward: -860.402 [-968.807, -751.996] - loss: 15656.691 - mae: 1272.950 - mean_q: 1639.753 Interval 3656 (1827500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.9081 7 episodes - episode_reward: -325.193 [-567.979, -89.922] - loss: 14516.133 - mae: 1177.739 - mean_q: 1508.398 Interval 3657 (1828000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5335 5 episodes - episode_reward: -700.927 [-1240.748, -367.989] - loss: 13378.424 - mae: 1138.142 - mean_q: 1453.971 Interval 3658 (1828500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2476 4 episodes - episode_reward: -573.011 [-780.556, -378.531] - loss: 13039.402 - mae: 1096.687 - mean_q: 1400.168 Interval 3659 (1829000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3756 4 episodes - episode_reward: -519.200 [-1208.091, -152.289] - loss: 12789.432 - mae: 1021.781 - mean_q: 1297.271 Interval 3660 (1829500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9205 2 episodes - episode_reward: -845.676 [-1054.437, -636.915] - loss: 13709.907 - mae: 972.897 - mean_q: 1227.896 Interval 3661 (1830000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.2292 9 episodes - episode_reward: -430.689 [-929.652, -103.560] - loss: 11469.988 - mae: 919.624 - mean_q: 1158.241 Interval 3662 (1830500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0039 3 episodes - episode_reward: -491.722 [-560.134, -423.002] - loss: 11832.647 - mae: 869.434 - mean_q: 1089.237 Interval 3663 (1831000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1715 5 episodes - episode_reward: -323.254 [-525.638, -122.692] - loss: 11987.814 - mae: 844.101 - mean_q: 1050.882 Interval 3664 (1831500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9961 4 episodes - episode_reward: -498.799 [-730.065, -325.926] - loss: 12138.165 - mae: 822.141 - mean_q: 1020.388 Interval 3665 (1832000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6999 5 episodes - episode_reward: -399.895 [-497.483, -346.976] - loss: 11128.622 - mae: 795.366 - mean_q: 984.779 Interval 3666 (1832500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0540 5 episodes - episode_reward: -620.431 [-1547.829, -100.000] - loss: 9900.284 - mae: 759.511 - mean_q: 935.899 Interval 3667 (1833000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.6624 3 episodes - episode_reward: -579.289 [-857.414, -268.646] - loss: 10001.308 - mae: 735.214 - mean_q: 903.345 Interval 3668 (1833500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3624 5 episodes - episode_reward: -408.097 [-601.256, -125.321] - loss: 10375.574 - mae: 712.647 - mean_q: 870.622 Interval 3669 (1834000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0159 1 episodes - episode_reward: -413.545 [-413.545, -413.545] - loss: 9529.903 - mae: 666.144 - mean_q: 807.617 Interval 3670 (1834500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.0341 6 episodes - episode_reward: -355.392 [-536.014, -140.727] - loss: 9278.930 - mae: 665.803 - mean_q: 807.109 Interval 3671 (1835000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0480 4 episodes - episode_reward: -629.055 [-1312.508, -290.540] - loss: 8369.214 - mae: 651.157 - mean_q: 785.219 Interval 3672 (1835500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3223 5 episodes - episode_reward: -397.772 [-757.042, -150.728] - loss: 8574.238 - mae: 630.540 - mean_q: 758.588 Interval 3673 (1836000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9400 6 episodes - episode_reward: -447.500 [-551.386, -263.822] - loss: 8672.076 - mae: 610.560 - mean_q: 729.821 Interval 3674 (1836500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2823 7 episodes - episode_reward: -370.173 [-686.549, -180.719] - loss: 8173.074 - mae: 590.570 - mean_q: 703.067 Interval 3675 (1837000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0062 5 episodes - episode_reward: -479.087 [-588.708, -393.510] - loss: 9031.948 - mae: 586.894 - mean_q: 698.065 Interval 3676 (1837500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7823 8 episodes - episode_reward: -349.374 [-523.623, -244.029] - loss: 8527.764 - mae: 569.000 - mean_q: 674.098 Interval 3677 (1838000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.5868 6 episodes - episode_reward: -551.163 [-1021.070, -96.765] - loss: 9346.437 - mae: 565.697 - mean_q: 669.200 Interval 3678 (1838500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.9787 5 episodes - episode_reward: -639.187 [-1523.635, -253.752] - loss: 9202.857 - mae: 550.817 - mean_q: 646.260 Interval 3679 (1839000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8806 6 episodes - episode_reward: -401.834 [-584.075, -160.930] - loss: 8555.958 - mae: 532.028 - mean_q: 619.379 Interval 3680 (1839500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6692 7 episodes - episode_reward: -407.922 [-695.662, -103.325] - loss: 8276.538 - mae: 535.755 - mean_q: 627.390 Interval 3681 (1840000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0331 5 episodes - episode_reward: -455.295 [-593.568, -296.612] - loss: 8263.038 - mae: 520.939 - mean_q: 604.069 Interval 3682 (1840500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4920 5 episodes - episode_reward: -546.926 [-1016.712, -352.284] - loss: 7989.548 - mae: 511.392 - mean_q: 591.518 Interval 3683 (1841000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.5861 6 episodes - episode_reward: -393.615 [-835.944, -143.458] - loss: 7452.253 - mae: 493.276 - mean_q: 564.966 Interval 3684 (1841500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4509 7 episodes - episode_reward: -348.630 [-606.249, -129.894] - loss: 7370.629 - mae: 494.698 - mean_q: 566.394 Interval 3685 (1842000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2448 2 episodes - episode_reward: -1004.170 [-1194.480, -813.860] - loss: 7358.598 - mae: 475.637 - mean_q: 541.989 Interval 3686 (1842500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -7.6498 3 episodes - episode_reward: -1249.140 [-2973.598, -140.115] - loss: 6949.473 - mae: 456.924 - mean_q: 514.381 Interval 3687 (1843000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.5628 3 episodes - episode_reward: -460.116 [-804.520, -176.425] - loss: 6592.976 - mae: 442.074 - mean_q: 493.849 Interval 3688 (1843500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.2556 3 episodes - episode_reward: -690.911 [-1001.032, -408.259] - loss: 6204.637 - mae: 413.583 - mean_q: 454.034 Interval 3689 (1844000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.0008 7 episodes - episode_reward: -343.544 [-499.291, -140.530] - loss: 6680.124 - mae: 411.672 - mean_q: 451.011 Interval 3690 (1844500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.1746 5 episodes - episode_reward: -762.238 [-1608.991, -299.327] - loss: 6056.972 - mae: 393.170 - mean_q: 426.804 Interval 3691 (1845000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.7401 5 episodes - episode_reward: -467.907 [-632.493, -178.723] - loss: 5180.322 - mae: 374.039 - mean_q: 401.196 Interval 3692 (1845500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.4032 4 episodes - episode_reward: -584.949 [-810.142, -133.188] - loss: 5483.633 - mae: 352.735 - mean_q: 372.517 Interval 3693 (1846000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.3447 4 episodes - episode_reward: -653.089 [-773.542, -499.263] - loss: 5200.154 - mae: 349.646 - mean_q: 367.723 Interval 3694 (1846500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.9128 4 episodes - episode_reward: -422.631 [-725.306, -144.822] - loss: 5527.760 - mae: 339.977 - mean_q: 352.635 Interval 3695 (1847000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.0069 4 episodes - episode_reward: -920.344 [-2167.466, -470.636] - loss: 4888.907 - mae: 316.861 - mean_q: 321.890 Interval 3696 (1847500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.9795 6 episodes - episode_reward: -585.324 [-1039.148, -115.006] - loss: 4718.453 - mae: 314.190 - mean_q: 316.851 Interval 3697 (1848000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2224 6 episodes - episode_reward: -344.536 [-639.553, -100.000] - loss: 4529.963 - mae: 302.291 - mean_q: 299.672 Interval 3698 (1848500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.9701 5 episodes - episode_reward: -501.280 [-730.803, -332.464] - loss: 4055.199 - mae: 283.169 - mean_q: 271.905 Interval 3699 (1849000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.7509 8 episodes - episode_reward: -324.149 [-806.430, -100.000] - loss: 3861.031 - mae: 274.632 - mean_q: 259.502 Interval 3700 (1849500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.5383 4 episodes - episode_reward: -455.506 [-526.254, -311.222] - loss: 4076.241 - mae: 272.176 - mean_q: 256.403 Interval 3701 (1850000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.9371 5 episodes - episode_reward: -742.459 [-1543.778, -193.388] - loss: 3588.091 - mae: 264.351 - mean_q: 244.927 Interval 3702 (1850500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.0398 6 episodes - episode_reward: -405.291 [-576.925, -100.000] - loss: 4030.000 - mae: 248.092 - mean_q: 219.787 Interval 3703 (1851000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.3765 6 episodes - episode_reward: -462.889 [-730.718, -197.876] - loss: 3449.119 - mae: 235.226 - mean_q: 202.375 Interval 3704 (1851500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.1101 5 episodes - episode_reward: -468.230 [-547.201, -373.970] - loss: 3304.155 - mae: 225.098 - mean_q: 185.948 Interval 3705 (1852000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.2923 5 episodes - episode_reward: -564.839 [-754.420, -222.973] - loss: 3195.838 - mae: 224.647 - mean_q: 186.085 Interval 3706 (1852500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.2182 5 episodes - episode_reward: -497.468 [-655.562, -383.139] - loss: 3244.433 - mae: 209.386 - mean_q: 162.440 Interval 3707 (1853000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.2302 5 episodes - episode_reward: -467.105 [-771.570, -278.447] - loss: 3205.099 - mae: 209.369 - mean_q: 160.080 Interval 3708 (1853500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.0346 5 episodes - episode_reward: -593.961 [-1355.272, -100.000] - loss: 2891.386 - mae: 196.438 - mean_q: 141.198 Interval 3709 (1854000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.7892 4 episodes - episode_reward: -437.187 [-632.935, -211.057] - loss: 2668.046 - mae: 185.466 - mean_q: 125.773 Interval 3710 (1854500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.5270 7 episodes - episode_reward: -478.894 [-985.381, -100.000] - loss: 2772.747 - mae: 178.514 - mean_q: 114.481 Interval 3711 (1855000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.4541 4 episodes - episode_reward: -694.579 [-1102.060, -483.152] - loss: 2407.160 - mae: 175.634 - mean_q: 109.868 Interval 3712 (1855500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.6342 6 episodes - episode_reward: -568.277 [-628.188, -504.041] - loss: 2311.574 - mae: 169.051 - mean_q: 100.604 Interval 3713 (1856000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8369 4 episodes - episode_reward: -649.403 [-1031.353, -439.221] - loss: 2234.446 - mae: 167.521 - mean_q: 99.332 Interval 3714 (1856500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.5950 7 episodes - episode_reward: -431.145 [-919.617, -100.000] - loss: 2107.412 - mae: 167.338 - mean_q: 98.612 Interval 3715 (1857000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.6439 5 episodes - episode_reward: -550.828 [-927.468, -303.263] - loss: 1955.278 - mae: 160.508 - mean_q: 87.615 Interval 3716 (1857500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.6280 6 episodes - episode_reward: -430.526 [-727.678, -130.485] - loss: 1911.009 - mae: 154.679 - mean_q: 79.415 Interval 3717 (1858000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8470 7 episodes - episode_reward: -455.277 [-800.635, -158.206] - loss: 1757.370 - mae: 154.038 - mean_q: 77.428 Interval 3718 (1858500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0679 4 episodes - episode_reward: -762.988 [-1111.433, -558.295] - loss: 1832.273 - mae: 148.242 - mean_q: 68.438 Interval 3719 (1859000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.0039 8 episodes - episode_reward: -374.278 [-787.786, -171.597] - loss: 1860.113 - mae: 143.521 - mean_q: 63.266 Interval 3720 (1859500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.6430 6 episodes - episode_reward: -543.560 [-821.583, -100.000] - loss: 1522.044 - mae: 147.097 - mean_q: 68.677 Interval 3721 (1860000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5823 6 episodes - episode_reward: -435.323 [-706.516, -114.467] - loss: 1754.219 - mae: 143.505 - mean_q: 62.662 Interval 3722 (1860500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.4076 6 episodes - episode_reward: -572.808 [-676.900, -411.011] - loss: 1476.773 - mae: 140.091 - mean_q: 57.155 Interval 3723 (1861000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3374 5 episodes - episode_reward: -519.599 [-777.450, -165.958] - loss: 1663.486 - mae: 137.954 - mean_q: 52.692 Interval 3724 (1861500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7387 6 episodes - episode_reward: -410.464 [-579.611, -210.346] - loss: 1240.185 - mae: 138.705 - mean_q: 54.914 Interval 3725 (1862000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0517 5 episodes - episode_reward: -565.832 [-866.571, -140.368] - loss: 1508.034 - mae: 137.319 - mean_q: 51.722 Interval 3726 (1862500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7703 8 episodes - episode_reward: -369.814 [-662.997, -157.912] - loss: 1536.650 - mae: 133.872 - mean_q: 45.327 Interval 3727 (1863000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7128 7 episodes - episode_reward: -427.856 [-767.753, -182.021] - loss: 1323.445 - mae: 131.298 - mean_q: 40.967 Interval 3728 (1863500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6985 5 episodes - episode_reward: -373.204 [-598.074, -114.334] - loss: 1289.555 - mae: 126.460 - mean_q: 32.969 Interval 3729 (1864000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4038 5 episodes - episode_reward: -596.160 [-796.388, -359.918] - loss: 1270.256 - mae: 127.940 - mean_q: 34.160 Interval 3730 (1864500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4507 5 episodes - episode_reward: -415.706 [-531.102, -174.634] - loss: 1063.074 - mae: 125.833 - mean_q: 31.208 Interval 3731 (1865000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7525 3 episodes - episode_reward: -996.579 [-1263.433, -831.381] - loss: 987.013 - mae: 122.899 - mean_q: 25.369 Interval 3732 (1865500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.2026 7 episodes - episode_reward: -473.704 [-766.016, -201.214] - loss: 1054.241 - mae: 124.186 - mean_q: 26.487 Interval 3733 (1866000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5383 4 episodes - episode_reward: -676.909 [-1198.321, -470.731] - loss: 1105.131 - mae: 123.015 - mean_q: 24.309 Interval 3734 (1866500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.1518 7 episodes - episode_reward: -449.908 [-567.716, -366.628] - loss: 1314.371 - mae: 123.124 - mean_q: 23.623 Interval 3735 (1867000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2875 5 episodes - episode_reward: -532.385 [-691.736, -367.737] - loss: 1062.245 - mae: 122.947 - mean_q: 23.085 Interval 3736 (1867500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7348 4 episodes - episode_reward: -458.916 [-681.509, -315.416] - loss: 1085.422 - mae: 126.641 - mean_q: 28.108 Interval 3737 (1868000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5279 5 episodes - episode_reward: -496.570 [-658.150, -211.005] - loss: 1230.735 - mae: 129.386 - mean_q: 31.820 Interval 3738 (1868500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6323 5 episodes - episode_reward: -521.379 [-1023.856, -237.024] - loss: 1068.970 - mae: 131.904 - mean_q: 36.020 Interval 3739 (1869000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1991 4 episodes - episode_reward: -635.434 [-1180.401, -240.329] - loss: 1171.362 - mae: 134.662 - mean_q: 39.901 Interval 3740 (1869500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1834 4 episodes - episode_reward: -546.033 [-684.222, -244.817] - loss: 1388.733 - mae: 132.890 - mean_q: 36.694 Interval 3741 (1870000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4092 5 episodes - episode_reward: -567.290 [-650.573, -475.199] - loss: 1320.423 - mae: 126.169 - mean_q: 26.792 Interval 3742 (1870500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5332 9 episodes - episode_reward: -282.063 [-677.776, -121.384] - loss: 1185.445 - mae: 128.383 - mean_q: 30.348 Interval 3743 (1871000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2288 5 episodes - episode_reward: -531.649 [-873.417, -314.845] - loss: 1279.855 - mae: 131.704 - mean_q: 34.612 Interval 3744 (1871500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0663 7 episodes - episode_reward: -508.441 [-701.339, -209.036] - loss: 1216.533 - mae: 132.486 - mean_q: 35.691 Interval 3745 (1872000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9840 5 episodes - episode_reward: -389.845 [-594.217, -156.051] - loss: 1301.327 - mae: 131.868 - mean_q: 34.123 Interval 3746 (1872500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8079 5 episodes - episode_reward: -669.196 [-1508.870, -206.149] - loss: 1374.743 - mae: 128.574 - mean_q: 29.293 Interval 3747 (1873000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.5292 3 episodes - episode_reward: -673.714 [-941.289, -465.435] - loss: 1707.736 - mae: 138.076 - mean_q: 42.478 Interval 3748 (1873500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2030 7 episodes - episode_reward: -420.239 [-635.327, -228.838] - loss: 1681.724 - mae: 150.149 - mean_q: 59.025 Interval 3749 (1874000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8948 7 episodes - episode_reward: -408.558 [-782.904, -100.000] - loss: 1768.138 - mae: 141.167 - mean_q: 44.109 Interval 3750 (1874500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2766 7 episodes - episode_reward: -380.283 [-665.812, -41.175] - loss: 2048.145 - mae: 147.849 - mean_q: 53.585 Interval 3751 (1875000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.1752 6 episodes - episode_reward: -358.991 [-593.630, -234.724] - loss: 2513.210 - mae: 155.198 - mean_q: 63.667 Interval 3752 (1875500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7891 5 episodes - episode_reward: -485.320 [-646.652, -333.411] - loss: 2610.949 - mae: 157.368 - mean_q: 66.977 Interval 3753 (1876000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.5058 8 episodes - episode_reward: -276.442 [-503.388, -60.448] - loss: 3385.926 - mae: 170.358 - mean_q: 84.421 Interval 3754 (1876500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.2046 6 episodes - episode_reward: -507.726 [-1092.087, -227.475] - loss: 3236.341 - mae: 171.075 - mean_q: 84.744 Interval 3755 (1877000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9654 4 episodes - episode_reward: -510.387 [-857.626, -191.800] - loss: 3930.485 - mae: 184.581 - mean_q: 102.991 Interval 3756 (1877500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6338 6 episodes - episode_reward: -446.804 [-677.393, -273.045] - loss: 4368.057 - mae: 190.387 - mean_q: 110.758 Interval 3757 (1878000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.9201 5 episodes - episode_reward: -569.508 [-1060.245, -178.477] - loss: 4250.054 - mae: 197.583 - mean_q: 120.835 Interval 3758 (1878500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0166 7 episodes - episode_reward: -461.770 [-665.557, -151.462] - loss: 4536.978 - mae: 197.654 - mean_q: 120.453 Interval 3759 (1879000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0994 6 episodes - episode_reward: -414.766 [-871.081, -116.773] - loss: 5846.572 - mae: 222.721 - mean_q: 155.886 Interval 3760 (1879500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5264 5 episodes - episode_reward: -474.734 [-1077.714, -145.987] - loss: 5782.789 - mae: 226.736 - mean_q: 162.475 Interval 3761 (1880000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0642 6 episodes - episode_reward: -554.971 [-1133.890, -178.643] - loss: 7225.331 - mae: 237.222 - mean_q: 176.586 Interval 3762 (1880500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1582 4 episodes - episode_reward: -691.698 [-1399.025, -264.390] - loss: 7327.044 - mae: 235.721 - mean_q: 173.389 Interval 3763 (1881000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.6966 7 episodes - episode_reward: -476.573 [-614.259, -169.615] - loss: 8367.491 - mae: 244.737 - mean_q: 183.993 Interval 3764 (1881500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4301 3 episodes - episode_reward: -461.135 [-603.371, -297.092] - loss: 8703.058 - mae: 254.379 - mean_q: 197.291 Interval 3765 (1882000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1943 6 episodes - episode_reward: -647.351 [-1716.821, -203.592] - loss: 9427.301 - mae: 255.234 - mean_q: 196.901 Interval 3766 (1882500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3295 5 episodes - episode_reward: -403.307 [-705.384, -126.889] - loss: 11375.935 - mae: 249.374 - mean_q: 187.658 Interval 3767 (1883000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9861 6 episodes - episode_reward: -533.519 [-902.419, -302.035] - loss: 11674.330 - mae: 239.490 - mean_q: 174.477 Interval 3768 (1883500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0723 6 episodes - episode_reward: -504.895 [-913.520, -171.857] - loss: 9504.493 - mae: 235.348 - mean_q: 169.488 Interval 3769 (1884000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.3233 7 episodes - episode_reward: -368.014 [-761.647, -100.000] - loss: 9048.296 - mae: 217.925 - mean_q: 145.389 Interval 3770 (1884500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6008 4 episodes - episode_reward: -836.184 [-1763.750, -161.454] - loss: 10397.865 - mae: 221.494 - mean_q: 149.985 Interval 3771 (1885000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8260 4 episodes - episode_reward: -480.646 [-1072.863, 2.600] - loss: 8657.116 - mae: 244.592 - mean_q: 184.203 Interval 3772 (1885500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5801 4 episodes - episode_reward: -327.714 [-528.446, -186.878] - loss: 11226.645 - mae: 271.500 - mean_q: 220.689 Interval 3773 (1886000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6456 4 episodes - episode_reward: -541.877 [-951.272, -236.700] - loss: 12770.724 - mae: 315.533 - mean_q: 283.780 Interval 3774 (1886500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.9701 9 episodes - episode_reward: -406.015 [-1003.891, -100.000] - loss: 13345.576 - mae: 334.788 - mean_q: 311.106 Interval 3775 (1887000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.6869 8 episodes - episode_reward: -418.822 [-887.128, -191.906] - loss: 14402.288 - mae: 360.866 - mean_q: 348.178 Interval 3776 (1887500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9564 5 episodes - episode_reward: -400.776 [-823.883, -100.000] - loss: 15941.528 - mae: 378.222 - mean_q: 372.277 Interval 3777 (1888000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9718 4 episodes - episode_reward: -108.284 [-161.765, -55.547] - loss: 14707.179 - mae: 404.209 - mean_q: 409.002 Interval 3778 (1888500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7701 5 episodes - episode_reward: -389.887 [-772.407, -143.621] - loss: 15839.476 - mae: 422.195 - mean_q: 434.546 Interval 3779 (1889000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1292 Interval 3780 (1889500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7279 5 episodes - episode_reward: -181.589 [-246.805, -121.553] - loss: 16979.598 - mae: 513.071 - mean_q: 562.981 Interval 3781 (1890000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4084 2 episodes - episode_reward: -328.249 [-492.823, -163.675] - loss: 17598.400 - mae: 590.130 - mean_q: 670.323 Interval 3782 (1890500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.1624 8 episodes - episode_reward: -452.219 [-1105.185, -177.987] - loss: 17289.699 - mae: 654.153 - mean_q: 764.157 Interval 3783 (1891000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.7376 1 episodes - episode_reward: -357.348 [-357.348, -357.348] - loss: 16770.900 - mae: 681.143 - mean_q: 804.786 Interval 3784 (1891500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1312 6 episodes - episode_reward: -331.740 [-446.028, -104.268] - loss: 18769.631 - mae: 720.377 - mean_q: 862.601 Interval 3785 (1892000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.6126 7 episodes - episode_reward: -485.626 [-790.575, -372.733] - loss: 22050.490 - mae: 816.800 - mean_q: 995.958 Interval 3786 (1892500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2178 3 episodes - episode_reward: -696.923 [-865.042, -430.007] - loss: 24075.010 - mae: 874.575 - mean_q: 1072.838 Interval 3787 (1893000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1909 Interval 3788 (1893500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1305 Interval 3789 (1894000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0618 5 episodes - episode_reward: -335.792 [-531.774, -99.717] - loss: 23119.504 - mae: 1060.015 - mean_q: 1328.628 Interval 3790 (1894500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1500 Interval 3791 (1895000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2719 2 episodes - episode_reward: -369.862 [-374.932, -364.793] - loss: 19684.283 - mae: 1125.383 - mean_q: 1416.688 Interval 3792 (1895500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0098 3 episodes - episode_reward: -329.899 [-513.454, -97.196] - loss: 20714.080 - mae: 1138.871 - mean_q: 1437.592 Interval 3793 (1896000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1629 Interval 3794 (1896500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1797 Interval 3795 (1897000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1893 Interval 3796 (1897500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5701 2 episodes - episode_reward: -752.801 [-1354.276, -151.326] - loss: 22562.857 - mae: 1382.704 - mean_q: 1759.654 Interval 3797 (1898000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1364 Interval 3798 (1898500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1552 Interval 3799 (1899000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1930 Interval 3800 (1899500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1691 Interval 3801 (1900000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1818 Interval 3802 (1900500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1940 Interval 3803 (1901000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1824 Interval 3804 (1901500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1643 Interval 3805 (1902000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1755 Interval 3806 (1902500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1401 Interval 3807 (1903000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.2468 Interval 3808 (1903500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1129 Interval 3809 (1904000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2212 Interval 3810 (1904500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.1657 Interval 3811 (1905000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.1680 Interval 3812 (1905500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1641 Interval 3813 (1906000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2053 Interval 3814 (1906500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1748 Interval 3815 (1907000 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.1564 Interval 3816 (1907500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1901 Interval 3817 (1908000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.2250 Interval 3818 (1908500 steps performed) 500/500 [==============================] - 9s 17ms/step - reward: -0.2241 Interval 3819 (1909000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7436 1 episodes - episode_reward: -2321.950 [-2321.950, -2321.950] - loss: 14000.013 - mae: 2007.636 - mean_q: 2621.508 Interval 3820 (1909500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2826 1 episodes - episode_reward: -187.264 [-187.264, -187.264] - loss: 14170.852 - mae: 1968.990 - mean_q: 2570.843 Interval 3821 (1910000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1360 Interval 3822 (1910500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0041 1 episodes - episode_reward: -586.099 [-586.099, -586.099] - loss: 18095.084 - mae: 2006.545 - mean_q: 2622.175 Interval 3823 (1911000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5462 4 episodes - episode_reward: -319.175 [-464.856, -173.934] - loss: 17688.666 - mae: 2030.253 - mean_q: 2658.226 Interval 3824 (1911500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2140 Interval 3825 (1912000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7707 4 episodes - episode_reward: -360.189 [-479.053, -261.218] - loss: 14674.404 - mae: 2078.187 - mean_q: 2723.372 Interval 3826 (1912500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.2040 11 episodes - episode_reward: -357.966 [-626.473, -100.000] - loss: 16458.014 - mae: 2048.602 - mean_q: 2678.242 Interval 3827 (1913000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.1529 2 episodes - episode_reward: -348.259 [-430.458, -266.060] - loss: 18174.783 - mae: 2046.768 - mean_q: 2678.438 Interval 3828 (1913500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3003 3 episodes - episode_reward: -413.837 [-484.472, -347.932] - loss: 15447.075 - mae: 2029.445 - mean_q: 2658.649 Interval 3829 (1914000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6272 4 episodes - episode_reward: -277.169 [-420.090, -172.657] - loss: 15573.330 - mae: 2055.627 - mean_q: 2695.564 Interval 3830 (1914500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2608 5 episodes - episode_reward: -328.940 [-573.022, 2.118] - loss: 14083.299 - mae: 1990.099 - mean_q: 2614.989 Interval 3831 (1915000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5276 1 episodes - episode_reward: -316.210 [-316.210, -316.210] - loss: 14016.509 - mae: 1981.504 - mean_q: 2601.777 Interval 3832 (1915500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.8534 7 episodes - episode_reward: -365.860 [-498.241, -243.622] - loss: 15013.912 - mae: 1946.744 - mean_q: 2555.121 Interval 3833 (1916000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.9477 9 episodes - episode_reward: -328.211 [-485.796, -133.864] - loss: 13099.278 - mae: 1918.921 - mean_q: 2518.145 Interval 3834 (1916500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8032 3 episodes - episode_reward: -610.804 [-1091.080, -230.669] - loss: 13437.418 - mae: 1905.074 - mean_q: 2499.780 Interval 3835 (1917000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1879 Interval 3836 (1917500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2089 Interval 3837 (1918000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1778 Interval 3838 (1918500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1788 Interval 3839 (1919000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.2113 Interval 3840 (1919500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1773 Interval 3841 (1920000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1487 Interval 3842 (1920500 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1113 Interval 3843 (1921000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7756 1 episodes - episode_reward: -1144.504 [-1144.504, -1144.504] - loss: 11506.281 - mae: 1712.947 - mean_q: 2240.981 Interval 3844 (1921500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2611 Interval 3845 (1922000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1965 Interval 3846 (1922500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1480 Interval 3847 (1923000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.1702 Interval 3848 (1923500 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.2071 Interval 3849 (1924000 steps performed) 500/500 [==============================] - 9s 17ms/step - reward: -0.1813 Interval 3850 (1924500 steps performed) 500/500 [==============================] - 10s 19ms/step - reward: -0.2030 Interval 3851 (1925000 steps performed) 500/500 [==============================] - 11s 21ms/step - reward: -0.1996 Interval 3852 (1925500 steps performed) 500/500 [==============================] - 11s 23ms/step - reward: -0.1673 Interval 3853 (1926000 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -0.1500 Interval 3854 (1926500 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -0.2166 Interval 3855 (1927000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -0.1718 Interval 3856 (1927500 steps performed) 500/500 [==============================] - 16s 31ms/step - reward: -0.1609 Interval 3857 (1928000 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -0.2107 Interval 3858 (1928500 steps performed) 500/500 [==============================] - 17s 34ms/step - reward: -0.1790 Interval 3859 (1929000 steps performed) 500/500 [==============================] - 17s 35ms/step - reward: -0.2313 Interval 3860 (1929500 steps performed) 500/500 [==============================] - 18s 36ms/step - reward: -0.1332 Interval 3861 (1930000 steps performed) 500/500 [==============================] - 18s 37ms/step - reward: -0.2604 Interval 3862 (1930500 steps performed) 500/500 [==============================] - 19s 38ms/step - reward: -0.1728 Interval 3863 (1931000 steps performed) 500/500 [==============================] - 20s 39ms/step - reward: -0.1567 Interval 3864 (1931500 steps performed) 500/500 [==============================] - 20s 40ms/step - reward: -0.2150 Interval 3865 (1932000 steps performed) 500/500 [==============================] - 21s 41ms/step - reward: -0.1398 Interval 3866 (1932500 steps performed) 500/500 [==============================] - 22s 43ms/step - reward: -0.1870 Interval 3867 (1933000 steps performed) 500/500 [==============================] - 23s 45ms/step - reward: -0.1873 Interval 3868 (1933500 steps performed) 500/500 [==============================] - 24s 47ms/step - reward: -0.1637 Interval 3869 (1934000 steps performed) 500/500 [==============================] - 24s 47ms/step - reward: -0.1873 Interval 3870 (1934500 steps performed) 500/500 [==============================] - 26s 53ms/step - reward: -0.2208 Interval 3871 (1935000 steps performed) 500/500 [==============================] - 26s 52ms/step - reward: -0.1747 Interval 3872 (1935500 steps performed) 500/500 [==============================] - 27s 54ms/step - reward: -0.1774 Interval 3873 (1936000 steps performed) 500/500 [==============================] - 27s 54ms/step - reward: -0.1857 Interval 3874 (1936500 steps performed) 500/500 [==============================] - 27s 55ms/step - reward: -0.2118 Interval 3875 (1937000 steps performed) 500/500 [==============================] - 28s 56ms/step - reward: -0.1908 Interval 3876 (1937500 steps performed) 500/500 [==============================] - 29s 59ms/step - reward: -0.1784 Interval 3877 (1938000 steps performed) 500/500 [==============================] - 28s 57ms/step - reward: -0.1673 Interval 3878 (1938500 steps performed) 500/500 [==============================] - 28s 57ms/step - reward: -0.1762 Interval 3879 (1939000 steps performed) 500/500 [==============================] - 29s 57ms/step - reward: -0.1945 Interval 3880 (1939500 steps performed) 500/500 [==============================] - 29s 58ms/step - reward: -0.1195 Interval 3881 (1940000 steps performed) 500/500 [==============================] - 29s 58ms/step - reward: -0.2412 Interval 3882 (1940500 steps performed) 500/500 [==============================] - 29s 58ms/step - reward: -0.1264 Interval 3883 (1941000 steps performed) 500/500 [==============================] - 30s 59ms/step - reward: -0.1779 Interval 3884 (1941500 steps performed) 500/500 [==============================] - 30s 60ms/step - reward: -0.1724 Interval 3885 (1942000 steps performed) 500/500 [==============================] - 29s 59ms/step - reward: -0.1373 Interval 3886 (1942500 steps performed) 500/500 [==============================] - 30s 61ms/step - reward: -0.2692 Interval 3887 (1943000 steps performed) 500/500 [==============================] - 29s 57ms/step - reward: -1.5974 2 episodes - episode_reward: -2398.739 [-4697.045, -100.434] - loss: 1123.291 - mae: 1063.071 - mean_q: 1412.105 Interval 3888 (1943500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.5881 8 episodes - episode_reward: -479.727 [-862.788, -257.209] - loss: 1193.264 - mae: 1015.758 - mean_q: 1347.066 Interval 3889 (1944000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8103 4 episodes - episode_reward: -715.651 [-1387.913, -414.750] - loss: 1333.167 - mae: 992.699 - mean_q: 1315.113 Interval 3890 (1944500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3066 4 episodes - episode_reward: -622.860 [-1053.012, -368.593] - loss: 1337.109 - mae: 955.299 - mean_q: 1262.190 Interval 3891 (1945000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.5522 7 episodes - episode_reward: -492.739 [-640.739, -325.289] - loss: 1166.281 - mae: 922.486 - mean_q: 1217.204 Interval 3892 (1945500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.6110 8 episodes - episode_reward: -499.394 [-665.275, -304.107] - loss: 1046.758 - mae: 895.998 - mean_q: 1180.409 Interval 3893 (1946000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.2104 14 episodes - episode_reward: -313.704 [-560.789, -100.000] - loss: 962.544 - mae: 858.031 - mean_q: 1128.947 Interval 3894 (1946500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0789 5 episodes - episode_reward: -514.421 [-836.088, -244.251] - loss: 967.245 - mae: 820.447 - mean_q: 1075.940 Interval 3895 (1947000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2803 5 episodes - episode_reward: -510.034 [-693.367, -325.480] - loss: 879.586 - mae: 789.675 - mean_q: 1031.988 Interval 3896 (1947500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.7891 8 episodes - episode_reward: -493.196 [-774.542, -226.409] - loss: 861.999 - mae: 756.305 - mean_q: 986.578 Interval 3897 (1948000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4124 4 episodes - episode_reward: -637.689 [-965.382, -190.236] - loss: 752.439 - mae: 722.916 - mean_q: 941.731 Interval 3898 (1948500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.1679 8 episodes - episode_reward: -514.702 [-1006.457, -277.600] - loss: 795.719 - mae: 696.417 - mean_q: 904.454 Interval 3899 (1949000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6475 4 episodes - episode_reward: -612.643 [-1001.431, -344.503] - loss: 772.177 - mae: 666.910 - mean_q: 861.792 Interval 3900 (1949500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.8253 5 episodes - episode_reward: -524.079 [-1213.439, -100.000] - loss: 773.071 - mae: 640.354 - mean_q: 825.724 Interval 3901 (1950000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.6559 12 episodes - episode_reward: -407.813 [-1020.113, -100.000] - loss: 723.926 - mae: 607.177 - mean_q: 779.191 Interval 3902 (1950500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8580 7 episodes - episode_reward: -433.264 [-516.865, -379.322] - loss: 675.601 - mae: 576.324 - mean_q: 735.486 Interval 3903 (1951000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.8844 9 episodes - episode_reward: -510.595 [-1171.118, -199.126] - loss: 662.861 - mae: 551.982 - mean_q: 701.612 Interval 3904 (1951500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -9.1344 8 episodes - episode_reward: -571.311 [-1061.193, -369.546] - loss: 621.207 - mae: 519.976 - mean_q: 656.848 Interval 3905 (1952000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.7577 10 episodes - episode_reward: -339.956 [-671.073, -100.000] - loss: 620.836 - mae: 491.098 - mean_q: 615.561 Interval 3906 (1952500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.2203 8 episodes - episode_reward: -511.822 [-909.331, -136.480] - loss: 609.073 - mae: 467.555 - mean_q: 584.881 Interval 3907 (1953000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.6889 6 episodes - episode_reward: -555.024 [-1101.808, -350.628] - loss: 614.828 - mae: 448.660 - mean_q: 557.529 Interval 3908 (1953500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2105 7 episodes - episode_reward: -445.106 [-659.385, -107.521] - loss: 603.101 - mae: 430.294 - mean_q: 531.076 Interval 3909 (1954000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.5958 8 episodes - episode_reward: -481.752 [-899.697, -297.925] - loss: 556.076 - mae: 411.743 - mean_q: 504.759 Interval 3910 (1954500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.9390 8 episodes - episode_reward: -550.651 [-1009.217, -100.000] - loss: 511.853 - mae: 393.233 - mean_q: 478.258 Interval 3911 (1955000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -9.0083 8 episodes - episode_reward: -564.537 [-1133.000, -135.235] - loss: 508.363 - mae: 377.040 - mean_q: 455.746 Interval 3912 (1955500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.0462 7 episodes - episode_reward: -482.822 [-822.318, -366.506] - loss: 502.918 - mae: 360.969 - mean_q: 431.588 Interval 3913 (1956000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.7469 7 episodes - episode_reward: -493.765 [-571.932, -401.331] - loss: 495.480 - mae: 342.362 - mean_q: 406.139 Interval 3914 (1956500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.4876 7 episodes - episode_reward: -529.429 [-870.232, -236.172] - loss: 515.208 - mae: 323.767 - mean_q: 378.397 Interval 3915 (1957000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -9.2977 9 episodes - episode_reward: -529.253 [-999.517, -262.666] - loss: 486.984 - mae: 310.824 - mean_q: 359.952 Interval 3916 (1957500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.0867 10 episodes - episode_reward: -389.789 [-837.388, -100.000] - loss: 456.259 - mae: 298.874 - mean_q: 344.077 Interval 3917 (1958000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.8817 5 episodes - episode_reward: -594.453 [-998.014, -446.129] - loss: 509.681 - mae: 290.336 - mean_q: 329.195 Interval 3918 (1958500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.5510 8 episodes - episode_reward: -539.487 [-1005.689, -241.916] - loss: 448.661 - mae: 285.190 - mean_q: 323.773 Interval 3919 (1959000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.8734 8 episodes - episode_reward: -474.861 [-739.115, -301.581] - loss: 490.835 - mae: 281.399 - mean_q: 317.791 Interval 3920 (1959500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -9.5894 9 episodes - episode_reward: -551.566 [-1133.628, -144.543] - loss: 440.487 - mae: 291.896 - mean_q: 334.985 Interval 3921 (1960000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.4698 7 episodes - episode_reward: -611.453 [-1131.429, -319.259] - loss: 490.388 - mae: 306.464 - mean_q: 352.861 Interval 3922 (1960500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.7994 7 episodes - episode_reward: -523.156 [-836.301, -100.000] - loss: 449.355 - mae: 322.084 - mean_q: 372.671 Interval 3923 (1961000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.9588 8 episodes - episode_reward: -631.800 [-1048.634, -265.850] - loss: 443.009 - mae: 329.204 - mean_q: 378.870 Interval 3924 (1961500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.3406 8 episodes - episode_reward: -540.178 [-843.201, -132.537] - loss: 402.298 - mae: 325.882 - mean_q: 373.766 Interval 3925 (1962000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.4538 7 episodes - episode_reward: -492.721 [-950.287, -169.253] - loss: 481.250 - mae: 333.235 - mean_q: 382.792 Interval 3926 (1962500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.8994 8 episodes - episode_reward: -528.739 [-1075.812, -100.000] - loss: 440.372 - mae: 330.675 - mean_q: 377.715 Interval 3927 (1963000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.2632 6 episodes - episode_reward: -591.321 [-1204.999, -175.726] - loss: 457.363 - mae: 320.441 - mean_q: 362.945 Interval 3928 (1963500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -9.3251 8 episodes - episode_reward: -650.965 [-1112.165, -288.687] - loss: 422.312 - mae: 326.518 - mean_q: 373.913 Interval 3929 (1964000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.7914 7 episodes - episode_reward: -571.797 [-1074.887, -293.877] - loss: 336.418 - mae: 340.326 - mean_q: 391.935 Interval 3930 (1964500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.5055 8 episodes - episode_reward: -522.033 [-788.291, -173.621] - loss: 407.490 - mae: 357.049 - mean_q: 413.289 Interval 3931 (1965000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -9.2136 7 episodes - episode_reward: -653.541 [-937.442, -389.582] - loss: 511.799 - mae: 387.953 - mean_q: 456.246 Interval 3932 (1965500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.4276 8 episodes - episode_reward: -536.930 [-798.341, -240.636] - loss: 577.812 - mae: 419.393 - mean_q: 498.267 Interval 3933 (1966000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.5004 8 episodes - episode_reward: -521.662 [-992.756, -32.596] - loss: 668.339 - mae: 478.421 - mean_q: 581.861 Interval 3934 (1966500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.6289 8 episodes - episode_reward: -429.438 [-735.357, -104.376] - loss: 729.976 - mae: 531.969 - mean_q: 651.036 Interval 3935 (1967000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.8930 7 episodes - episode_reward: -682.340 [-915.158, -487.919] - loss: 975.380 - mae: 591.078 - mean_q: 728.044 Interval 3936 (1967500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.3589 9 episodes - episode_reward: -470.710 [-673.602, -215.672] - loss: 1016.146 - mae: 630.089 - mean_q: 774.841 Interval 3937 (1968000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.1042 8 episodes - episode_reward: -463.065 [-731.336, -114.793] - loss: 1191.930 - mae: 680.989 - mean_q: 842.320 Interval 3938 (1968500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.4086 8 episodes - episode_reward: -508.453 [-1013.252, -146.612] - loss: 1427.824 - mae: 715.041 - mean_q: 884.781 Interval 3939 (1969000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.4525 7 episodes - episode_reward: -524.717 [-1021.065, -181.661] - loss: 1652.905 - mae: 749.676 - mean_q: 928.986 Interval 3940 (1969500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.7620 9 episodes - episode_reward: -474.237 [-884.988, -138.980] - loss: 1519.732 - mae: 766.800 - mean_q: 950.871 Interval 3941 (1970000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.4412 7 episodes - episode_reward: -611.457 [-748.537, -396.458] - loss: 1778.646 - mae: 775.067 - mean_q: 961.011 Interval 3942 (1970500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0239 5 episodes - episode_reward: -504.377 [-667.773, -291.000] - loss: 2011.572 - mae: 807.203 - mean_q: 1004.171 Interval 3943 (1971000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.7319 5 episodes - episode_reward: -557.845 [-1417.727, -151.786] - loss: 2053.842 - mae: 827.683 - mean_q: 1028.287 Interval 3944 (1971500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.9279 5 episodes - episode_reward: -592.788 [-1006.786, -97.493] - loss: 1837.814 - mae: 863.161 - mean_q: 1074.119 Interval 3945 (1972000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8535 7 episodes - episode_reward: -416.914 [-863.804, -100.000] - loss: 1957.637 - mae: 857.523 - mean_q: 1064.865 Interval 3946 (1972500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4672 5 episodes - episode_reward: -429.990 [-598.240, -122.476] - loss: 1812.254 - mae: 861.328 - mean_q: 1067.855 Interval 3947 (1973000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.1051 7 episodes - episode_reward: -461.552 [-848.459, -100.000] - loss: 1951.474 - mae: 858.423 - mean_q: 1063.654 Interval 3948 (1973500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.1364 4 episodes - episode_reward: -533.034 [-618.925, -479.456] - loss: 2169.334 - mae: 868.107 - mean_q: 1077.039 Interval 3949 (1974000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9244 4 episodes - episode_reward: -626.579 [-800.848, -506.173] - loss: 2327.915 - mae: 892.013 - mean_q: 1107.340 Interval 3950 (1974500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.9839 8 episodes - episode_reward: -373.877 [-629.707, -100.000] - loss: 2394.694 - mae: 877.874 - mean_q: 1086.522 Interval 3951 (1975000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3816 6 episodes - episode_reward: -382.135 [-614.047, -205.136] - loss: 2626.264 - mae: 888.805 - mean_q: 1096.144 Interval 3952 (1975500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2925 4 episodes - episode_reward: -707.844 [-814.446, -641.043] - loss: 3425.817 - mae: 890.438 - mean_q: 1097.079 Interval 3953 (1976000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1022 3 episodes - episode_reward: -496.039 [-642.054, -383.424] - loss: 2867.961 - mae: 889.401 - mean_q: 1096.297 Interval 3954 (1976500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.6015 5 episodes - episode_reward: -908.493 [-2318.587, -313.039] - loss: 2732.827 - mae: 863.924 - mean_q: 1058.531 Interval 3955 (1977000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7538 5 episodes - episode_reward: -424.892 [-620.006, -216.926] - loss: 3123.866 - mae: 876.757 - mean_q: 1073.384 Interval 3956 (1977500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7240 4 episodes - episode_reward: -504.898 [-695.377, -390.107] - loss: 3024.672 - mae: 846.371 - mean_q: 1031.185 Interval 3957 (1978000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8306 3 episodes - episode_reward: -996.482 [-1769.471, -446.248] - loss: 3005.567 - mae: 816.074 - mean_q: 987.825 Interval 3958 (1978500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2312 4 episodes - episode_reward: -609.144 [-1100.372, -202.629] - loss: 2699.280 - mae: 809.929 - mean_q: 980.153 Interval 3959 (1979000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4487 6 episodes - episode_reward: -451.515 [-555.514, -350.409] - loss: 2626.312 - mae: 776.867 - mean_q: 934.057 Interval 3960 (1979500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4003 4 episodes - episode_reward: -469.192 [-660.221, -124.771] - loss: 2303.390 - mae: 726.272 - mean_q: 862.742 Interval 3961 (1980000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9983 2 episodes - episode_reward: -481.709 [-711.908, -251.509] - loss: 2660.691 - mae: 724.757 - mean_q: 862.398 Interval 3962 (1980500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.5540 8 episodes - episode_reward: -360.375 [-619.069, -100.000] - loss: 2671.862 - mae: 702.606 - mean_q: 834.185 Interval 3963 (1981000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2518 5 episodes - episode_reward: -516.930 [-1145.908, -228.629] - loss: 2657.315 - mae: 664.793 - mean_q: 781.720 Interval 3964 (1981500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6807 3 episodes - episode_reward: -456.631 [-597.914, -218.672] - loss: 2501.304 - mae: 671.008 - mean_q: 792.546 Interval 3965 (1982000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6187 6 episodes - episode_reward: -405.845 [-712.080, -146.331] - loss: 2832.956 - mae: 638.506 - mean_q: 748.327 Interval 3966 (1982500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6942 7 episodes - episode_reward: -387.236 [-721.931, -100.000] - loss: 2945.630 - mae: 621.920 - mean_q: 724.758 Interval 3967 (1983000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.3746 2 episodes - episode_reward: -1287.929 [-1960.759, -615.099] - loss: 3143.636 - mae: 596.070 - mean_q: 687.479 Interval 3968 (1983500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8287 5 episodes - episode_reward: -489.181 [-730.104, -100.000] - loss: 3060.060 - mae: 576.557 - mean_q: 660.486 Interval 3969 (1984000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6551 6 episodes - episode_reward: -399.586 [-681.712, -186.870] - loss: 2950.073 - mae: 564.519 - mean_q: 644.657 Interval 3970 (1984500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0967 2 episodes - episode_reward: -460.054 [-581.054, -339.054] - loss: 3045.700 - mae: 558.180 - mean_q: 634.880 Interval 3971 (1985000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9713 4 episodes - episode_reward: -530.728 [-698.519, -250.709] - loss: 3464.632 - mae: 526.783 - mean_q: 590.182 Interval 3972 (1985500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7524 7 episodes - episode_reward: -409.592 [-563.370, -100.000] - loss: 3683.673 - mae: 514.728 - mean_q: 572.525 Interval 3973 (1986000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0606 6 episodes - episode_reward: -444.413 [-636.303, -257.913] - loss: 3695.993 - mae: 491.487 - mean_q: 540.147 Interval 3974 (1986500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.9846 7 episodes - episode_reward: -482.770 [-667.702, -329.469] - loss: 3520.756 - mae: 464.537 - mean_q: 500.201 Interval 3975 (1987000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7886 5 episodes - episode_reward: -372.122 [-565.534, -102.775] - loss: 3925.803 - mae: 464.911 - mean_q: 501.200 Interval 3976 (1987500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3420 6 episodes - episode_reward: -452.813 [-607.810, -100.000] - loss: 3895.464 - mae: 439.583 - mean_q: 464.929 Interval 3977 (1988000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7396 5 episodes - episode_reward: -431.413 [-714.888, -243.492] - loss: 4127.942 - mae: 436.144 - mean_q: 457.692 Interval 3978 (1988500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0813 3 episodes - episode_reward: -760.052 [-876.375, -575.924] - loss: 5018.714 - mae: 432.032 - mean_q: 452.061 Interval 3979 (1989000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7691 7 episodes - episode_reward: -474.054 [-906.923, -166.841] - loss: 5150.926 - mae: 402.171 - mean_q: 408.110 Interval 3980 (1989500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3817 3 episodes - episode_reward: -491.812 [-634.948, -375.990] - loss: 5451.589 - mae: 389.576 - mean_q: 390.848 Interval 3981 (1990000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5403 4 episodes - episode_reward: -485.888 [-906.507, -152.522] - loss: 5186.901 - mae: 368.091 - mean_q: 359.269 Interval 3982 (1990500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.9720 5 episodes - episode_reward: -584.325 [-1033.039, -289.746] - loss: 6053.715 - mae: 340.564 - mean_q: 318.317 Interval 3983 (1991000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3997 3 episodes - episode_reward: -634.907 [-790.074, -340.726] - loss: 6388.584 - mae: 328.803 - mean_q: 301.600 Interval 3984 (1991500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3717 5 episodes - episode_reward: -581.901 [-1077.072, -271.462] - loss: 5148.716 - mae: 297.302 - mean_q: 255.449 Interval 3985 (1992000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3029 4 episodes - episode_reward: -369.499 [-596.886, -69.714] - loss: 5597.052 - mae: 282.115 - mean_q: 232.800 Interval 3986 (1992500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7592 6 episodes - episode_reward: -627.494 [-1500.538, -258.702] - loss: 5968.856 - mae: 268.112 - mean_q: 212.585 Interval 3987 (1993000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0083 4 episodes - episode_reward: -606.168 [-1058.705, -311.643] - loss: 6070.524 - mae: 256.926 - mean_q: 196.117 Interval 3988 (1993500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3429 5 episodes - episode_reward: -519.574 [-698.824, -292.903] - loss: 5528.685 - mae: 249.347 - mean_q: 186.024 Interval 3989 (1994000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4413 5 episodes - episode_reward: -368.164 [-503.537, -234.696] - loss: 6378.475 - mae: 242.593 - mean_q: 176.419 Interval 3990 (1994500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8831 3 episodes - episode_reward: -655.430 [-737.665, -569.779] - loss: 6027.243 - mae: 246.970 - mean_q: 182.725 Interval 3991 (1995000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3081 6 episodes - episode_reward: -417.502 [-648.259, -100.000] - loss: 6980.509 - mae: 250.233 - mean_q: 186.586 Interval 3992 (1995500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3405 6 episodes - episode_reward: -421.779 [-641.373, -183.755] - loss: 8569.073 - mae: 238.181 - mean_q: 168.462 Interval 3993 (1996000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.4389 5 episodes - episode_reward: -685.343 [-1389.563, -343.346] - loss: 7246.145 - mae: 235.981 - mean_q: 165.320 Interval 3994 (1996500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5015 5 episodes - episode_reward: -567.669 [-768.021, -290.586] - loss: 7430.718 - mae: 232.125 - mean_q: 159.283 Interval 3995 (1997000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6522 7 episodes - episode_reward: -315.156 [-476.579, -100.000] - loss: 8646.909 - mae: 241.049 - mean_q: 171.310 Interval 3996 (1997500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.6698 2 episodes - episode_reward: -1467.916 [-2434.190, -501.642] - loss: 8338.217 - mae: 231.703 - mean_q: 156.810 Interval 3997 (1998000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5967 2 episodes - episode_reward: -756.831 [-1145.500, -368.163] - loss: 6474.231 - mae: 231.959 - mean_q: 158.137 Interval 3998 (1998500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1860 4 episodes - episode_reward: -689.639 [-1037.785, -467.167] - loss: 7191.331 - mae: 222.214 - mean_q: 142.815 Interval 3999 (1999000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7895 3 episodes - episode_reward: -825.259 [-1048.455, -640.700] - loss: 6523.636 - mae: 214.170 - mean_q: 132.035 Interval 4000 (1999500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6841 2 episodes - episode_reward: -672.561 [-785.308, -559.814] - loss: 7238.044 - mae: 226.492 - mean_q: 150.182 Interval 4001 (2000000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3657 2 episodes - episode_reward: -618.581 [-649.235, -587.927] - loss: 6668.797 - mae: 217.360 - mean_q: 137.502 Interval 4002 (2000500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.6455 11 episodes - episode_reward: -256.305 [-422.102, -114.021] - loss: 6440.493 - mae: 216.690 - mean_q: 137.432 Interval 4003 (2001000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3313 4 episodes - episode_reward: -416.760 [-743.377, -125.221] - loss: 5994.695 - mae: 213.088 - mean_q: 133.075 Interval 4004 (2001500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4935 4 episodes - episode_reward: -569.264 [-793.644, -188.730] - loss: 6788.900 - mae: 209.694 - mean_q: 128.261 Interval 4005 (2002000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.2333 4 episodes - episode_reward: -943.883 [-2475.165, -261.506] - loss: 6665.636 - mae: 221.780 - mean_q: 145.239 Interval 4006 (2002500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5100 6 episodes - episode_reward: -510.593 [-661.294, -338.631] - loss: 8443.488 - mae: 218.494 - mean_q: 139.720 Interval 4007 (2003000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.1608 6 episodes - episode_reward: -427.195 [-568.205, -242.081] - loss: 6702.332 - mae: 211.646 - mean_q: 130.395 Interval 4008 (2003500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9835 4 episodes - episode_reward: -620.245 [-971.999, -389.103] - loss: 7260.119 - mae: 203.228 - mean_q: 118.873 Interval 4009 (2004000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2620 6 episodes - episode_reward: -442.631 [-596.975, -126.109] - loss: 7993.347 - mae: 201.910 - mean_q: 117.148 Interval 4010 (2004500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9533 3 episodes - episode_reward: -613.178 [-1002.757, -406.597] - loss: 7926.757 - mae: 202.442 - mean_q: 116.663 Interval 4011 (2005000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4341 5 episodes - episode_reward: -663.697 [-883.987, -475.752] - loss: 9029.267 - mae: 199.172 - mean_q: 112.428 Interval 4012 (2005500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0758 7 episodes - episode_reward: -360.819 [-966.766, -155.708] - loss: 8565.915 - mae: 208.174 - mean_q: 124.627 Interval 4013 (2006000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7202 6 episodes - episode_reward: -405.453 [-868.312, -130.989] - loss: 8162.346 - mae: 196.765 - mean_q: 109.677 Interval 4014 (2006500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.6609 2 episodes - episode_reward: -783.865 [-857.101, -710.629] - loss: 7086.339 - mae: 205.223 - mean_q: 121.653 Interval 4015 (2007000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -5.5852 4 episodes - episode_reward: -760.652 [-1601.441, -164.867] - loss: 7020.403 - mae: 202.875 - mean_q: 118.506 Interval 4016 (2007500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7444 6 episodes - episode_reward: -482.391 [-733.060, -260.149] - loss: 8737.458 - mae: 204.580 - mean_q: 120.292 Interval 4017 (2008000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.4144 3 episodes - episode_reward: -878.487 [-1621.850, -176.320] - loss: 7586.241 - mae: 197.932 - mean_q: 112.008 Interval 4018 (2008500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.5352 4 episodes - episode_reward: -410.907 [-677.586, -213.975] - loss: 8386.723 - mae: 195.551 - mean_q: 110.079 Interval 4019 (2009000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5600 4 episodes - episode_reward: -597.634 [-907.133, -326.525] - loss: 6774.496 - mae: 210.311 - mean_q: 131.737 Interval 4020 (2009500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6768 5 episodes - episode_reward: -480.227 [-936.333, -233.301] - loss: 8434.039 - mae: 227.600 - mean_q: 155.497 Interval 4021 (2010000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9907 6 episodes - episode_reward: -324.847 [-703.333, -115.000] - loss: 8391.241 - mae: 224.766 - mean_q: 151.660 Interval 4022 (2010500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8882 3 episodes - episode_reward: -637.233 [-1004.976, -439.208] - loss: 6393.343 - mae: 231.569 - mean_q: 163.166 Interval 4023 (2011000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.7710 4 episodes - episode_reward: -1188.788 [-3552.948, -300.253] - loss: 7895.653 - mae: 230.400 - mean_q: 161.890 Interval 4024 (2011500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.5412 6 episodes - episode_reward: -395.313 [-677.741, -100.000] - loss: 7977.330 - mae: 249.247 - mean_q: 189.872 Interval 4025 (2012000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0861 5 episodes - episode_reward: -532.772 [-895.693, -275.690] - loss: 6578.034 - mae: 249.923 - mean_q: 191.207 Interval 4026 (2012500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0632 5 episodes - episode_reward: -501.221 [-695.986, -200.794] - loss: 7131.343 - mae: 273.108 - mean_q: 224.440 Interval 4027 (2013000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3488 3 episodes - episode_reward: -580.984 [-1062.608, -100.000] - loss: 6908.160 - mae: 278.673 - mean_q: 232.096 Interval 4028 (2013500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6528 4 episodes - episode_reward: -848.889 [-1351.250, -488.118] - loss: 7780.457 - mae: 298.661 - mean_q: 260.877 Interval 4029 (2014000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6026 6 episodes - episode_reward: -428.300 [-805.206, -209.428] - loss: 7378.499 - mae: 310.119 - mean_q: 273.518 Interval 4030 (2014500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1952 3 episodes - episode_reward: -826.841 [-1459.267, -398.707] - loss: 8319.895 - mae: 310.527 - mean_q: 274.528 Interval 4031 (2015000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3222 4 episodes - episode_reward: -641.602 [-1293.248, -310.875] - loss: 7458.032 - mae: 330.078 - mean_q: 302.490 Interval 4032 (2015500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4314 6 episodes - episode_reward: -487.006 [-766.018, -80.679] - loss: 7940.901 - mae: 332.962 - mean_q: 307.157 Interval 4033 (2016000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1854 5 episodes - episode_reward: -515.020 [-620.831, -334.568] - loss: 7998.422 - mae: 345.439 - mean_q: 322.900 Interval 4034 (2016500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0062 3 episodes - episode_reward: -1006.029 [-2183.265, -407.397] - loss: 9301.597 - mae: 395.094 - mean_q: 393.298 Interval 4035 (2017000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7008 8 episodes - episode_reward: -336.224 [-539.794, -216.792] - loss: 8691.682 - mae: 382.876 - mean_q: 376.269 Interval 4036 (2017500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2312 5 episodes - episode_reward: -371.267 [-576.558, -133.394] - loss: 9540.015 - mae: 419.426 - mean_q: 428.208 Interval 4037 (2018000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.2186 6 episodes - episode_reward: -355.999 [-723.505, -88.673] - loss: 10058.174 - mae: 454.141 - mean_q: 475.302 Interval 4038 (2018500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8471 1 episodes - episode_reward: -849.133 [-849.133, -849.133] - loss: 11141.632 - mae: 472.803 - mean_q: 500.054 Interval 4039 (2019000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1912 Interval 4040 (2019500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.9967 5 episodes - episode_reward: -535.769 [-997.751, -150.773] - loss: 12080.920 - mae: 634.356 - mean_q: 722.419 Interval 4041 (2020000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6690 2 episodes - episode_reward: -663.652 [-682.868, -644.435] - loss: 11981.432 - mae: 678.408 - mean_q: 783.337 Interval 4042 (2020500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1559 Interval 4043 (2021000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1550 Interval 4044 (2021500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2231 Interval 4045 (2022000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1606 2 episodes - episode_reward: -686.386 [-971.425, -401.347] - loss: 13115.459 - mae: 991.397 - mean_q: 1210.837 Interval 4046 (2022500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1040 Interval 4047 (2023000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1835 Interval 4048 (2023500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2751 Interval 4049 (2024000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.1576 3 episodes - episode_reward: -649.686 [-836.907, -368.748] - loss: 13480.257 - mae: 1229.484 - mean_q: 1537.252 Interval 4050 (2024500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7186 5 episodes - episode_reward: -540.352 [-710.833, -100.000] - loss: 12349.333 - mae: 1233.762 - mean_q: 1544.467 Interval 4051 (2025000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0663 6 episodes - episode_reward: -514.810 [-803.622, -111.191] - loss: 13729.226 - mae: 1245.295 - mean_q: 1559.880 Interval 4052 (2025500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0826 1 episodes - episode_reward: -667.342 [-667.342, -667.342] - loss: 12568.743 - mae: 1226.436 - mean_q: 1534.270 Interval 4053 (2026000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.8837 3 episodes - episode_reward: -471.476 [-506.956, -423.707] - loss: 12452.923 - mae: 1279.033 - mean_q: 1608.758 Interval 4054 (2026500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3317 1 episodes - episode_reward: -1014.037 [-1014.037, -1014.037] - loss: 12521.491 - mae: 1275.093 - mean_q: 1602.459 Interval 4055 (2027000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1755 Interval 4056 (2027500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2166 Interval 4057 (2028000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2061 Interval 4058 (2028500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.9721 3 episodes - episode_reward: -750.587 [-1259.759, -274.489] - loss: 13151.048 - mae: 1375.409 - mean_q: 1740.334 Interval 4059 (2029000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.5601 2 episodes - episode_reward: -1428.616 [-1697.283, -1159.949] - loss: 13647.700 - mae: 1417.349 - mean_q: 1799.391 Interval 4060 (2029500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.7755 1 episodes - episode_reward: -1669.095 [-1669.095, -1669.095] - loss: 14002.390 - mae: 1406.795 - mean_q: 1788.786 Interval 4061 (2030000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9498 4 episodes - episode_reward: -533.464 [-964.225, -211.620] - loss: 12809.995 - mae: 1377.986 - mean_q: 1750.117 Interval 4062 (2030500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.2604 7 episodes - episode_reward: -451.856 [-1256.556, -100.871] - loss: 13118.271 - mae: 1375.777 - mean_q: 1747.639 Interval 4063 (2031000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8338 4 episodes - episode_reward: -454.980 [-649.559, -130.815] - loss: 12932.806 - mae: 1384.601 - mean_q: 1761.268 Interval 4064 (2031500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.5209 6 episodes - episode_reward: -641.895 [-1080.003, -244.492] - loss: 11508.752 - mae: 1380.261 - mean_q: 1760.040 Interval 4065 (2032000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.2695 4 episodes - episode_reward: -1030.542 [-1649.728, -314.125] - loss: 11817.129 - mae: 1370.907 - mean_q: 1745.244 Interval 4066 (2032500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7027 1 episodes - episode_reward: -1217.811 [-1217.811, -1217.811] - loss: 11756.277 - mae: 1363.603 - mean_q: 1734.433 Interval 4067 (2033000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.1339 8 episodes - episode_reward: -531.729 [-1086.921, -100.000] - loss: 11415.224 - mae: 1359.063 - mean_q: 1726.753 Interval 4068 (2033500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6403 Interval 4069 (2034000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5442 1 episodes - episode_reward: -1461.512 [-1461.512, -1461.512] - loss: 11996.555 - mae: 1334.079 - mean_q: 1691.403 Interval 4070 (2034500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4998 2 episodes - episode_reward: -831.185 [-1156.253, -506.117] - loss: 11910.998 - mae: 1355.396 - mean_q: 1722.785 Interval 4071 (2035000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.9784 4 episodes - episode_reward: -782.689 [-1450.250, -400.414] - loss: 11692.562 - mae: 1340.528 - mean_q: 1701.058 Interval 4072 (2035500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4809 1 episodes - episode_reward: -1311.707 [-1311.707, -1311.707] - loss: 10782.018 - mae: 1356.588 - mean_q: 1723.369 Interval 4073 (2036000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.0481 3 episodes - episode_reward: -1166.564 [-1687.751, -619.376] - loss: 9411.555 - mae: 1378.903 - mean_q: 1754.289 Interval 4074 (2036500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.0695 7 episodes - episode_reward: -507.907 [-911.505, -200.266] - loss: 10666.408 - mae: 1363.167 - mean_q: 1733.094 Interval 4075 (2037000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6487 1 episodes - episode_reward: -1310.985 [-1310.985, -1310.985] - loss: 9936.586 - mae: 1341.765 - mean_q: 1701.153 Interval 4076 (2037500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3716 1 episodes - episode_reward: -1184.431 [-1184.431, -1184.431] - loss: 10771.227 - mae: 1396.800 - mean_q: 1776.747 Interval 4077 (2038000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.7523 5 episodes - episode_reward: -579.791 [-1012.604, -100.000] - loss: 10933.181 - mae: 1420.990 - mean_q: 1810.755 Interval 4078 (2038500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2842 1 episodes - episode_reward: -1122.815 [-1122.815, -1122.815] - loss: 12392.048 - mae: 1432.999 - mean_q: 1827.379 Interval 4079 (2039000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9108 4 episodes - episode_reward: -491.709 [-925.948, -191.179] - loss: 12198.304 - mae: 1422.391 - mean_q: 1813.307 Interval 4080 (2039500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7465 2 episodes - episode_reward: -707.255 [-1144.816, -269.693] - loss: 10825.968 - mae: 1413.506 - mean_q: 1799.664 Interval 4081 (2040000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2209 Interval 4082 (2040500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5683 1 episodes - episode_reward: -1180.238 [-1180.238, -1180.238] - loss: 11481.384 - mae: 1474.955 - mean_q: 1888.289 Interval 4083 (2041000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.9683 5 episodes - episode_reward: -526.962 [-993.627, -271.658] - loss: 13119.240 - mae: 1497.938 - mean_q: 1923.835 Interval 4084 (2041500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1244 Interval 4085 (2042000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1887 Interval 4086 (2042500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3796 1 episodes - episode_reward: -1322.417 [-1322.417, -1322.417] - loss: 12955.641 - mae: 1534.689 - mean_q: 1974.107 Interval 4087 (2043000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9729 2 episodes - episode_reward: -266.035 [-275.470, -256.600] - loss: 12692.525 - mae: 1529.195 - mean_q: 1967.479 Interval 4088 (2043500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1041 1 episodes - episode_reward: -523.437 [-523.437, -523.437] - loss: 13309.859 - mae: 1544.724 - mean_q: 1989.522 Interval 4089 (2044000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2681 Interval 4090 (2044500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.3034 4 episodes - episode_reward: -583.966 [-952.234, -255.580] - loss: 14462.046 - mae: 1546.442 - mean_q: 1994.096 Interval 4091 (2045000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1686 Interval 4092 (2045500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.9337 2 episodes - episode_reward: -1012.457 [-1120.554, -904.360] - loss: 14362.631 - mae: 1563.528 - mean_q: 2019.314 Interval 4093 (2046000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.9164 1 episodes - episode_reward: -926.331 [-926.331, -926.331] - loss: 15043.791 - mae: 1594.574 - mean_q: 2065.534 Interval 4094 (2046500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4825 1 episodes - episode_reward: -1205.899 [-1205.899, -1205.899] - loss: 14381.594 - mae: 1640.272 - mean_q: 2128.376 Interval 4095 (2047000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1511 Interval 4096 (2047500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1674 Interval 4097 (2048000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1592 Interval 4098 (2048500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.6321 4 episodes - episode_reward: -510.170 [-1206.991, -112.393] - loss: 12028.412 - mae: 1643.188 - mean_q: 2141.570 Interval 4099 (2049000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9027 1 episodes - episode_reward: -497.307 [-497.307, -497.307] - loss: 12888.162 - mae: 1670.981 - mean_q: 2177.775 Interval 4100 (2049500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2343 Interval 4101 (2050000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1631 Interval 4102 (2050500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1965 Interval 4103 (2051000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2491 Interval 4104 (2051500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1502 Interval 4105 (2052000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.1813 Interval 4106 (2052500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1857 Interval 4107 (2053000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.1910 Interval 4108 (2053500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1924 Interval 4109 (2054000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1827 Interval 4110 (2054500 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.2162 Interval 4111 (2055000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1687 Interval 4112 (2055500 steps performed) 500/500 [==============================] - 9s 17ms/step - reward: -0.2162 Interval 4113 (2056000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1853 Interval 4114 (2056500 steps performed) 500/500 [==============================] - 9s 17ms/step - reward: -0.1728 Interval 4115 (2057000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.2513 Interval 4116 (2057500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.3714 4 episodes - episode_reward: -939.753 [-2218.062, -451.963] - loss: 6836.525 - mae: 1843.345 - mean_q: 2435.805 Interval 4117 (2058000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4897 1 episodes - episode_reward: -618.029 [-618.029, -618.029] - loss: 6864.647 - mae: 1852.321 - mean_q: 2447.241 Interval 4118 (2058500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1684 Interval 4119 (2059000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1778 Interval 4120 (2059500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1650 Interval 4121 (2060000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2287 Interval 4122 (2060500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1643 Interval 4123 (2061000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1981 Interval 4124 (2061500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.0929 Interval 4125 (2062000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.1968 Interval 4126 (2062500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1699 Interval 4127 (2063000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.4375 1 episodes - episode_reward: -1175.085 [-1175.085, -1175.085] - loss: 6121.325 - mae: 2065.097 - mean_q: 2737.272 Interval 4128 (2063500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -6.0977 6 episodes - episode_reward: -506.829 [-707.499, -345.627] - loss: 6531.851 - mae: 2050.799 - mean_q: 2714.793 Interval 4129 (2064000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.2062 6 episodes - episode_reward: -352.022 [-667.040, -100.000] - loss: 6221.712 - mae: 2099.270 - mean_q: 2786.267 Interval 4130 (2064500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0586 7 episodes - episode_reward: -414.433 [-853.991, -280.523] - loss: 6499.361 - mae: 2088.410 - mean_q: 2767.769 Interval 4131 (2065000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4601 7 episodes - episode_reward: -315.199 [-378.832, -250.284] - loss: 6780.725 - mae: 2115.756 - mean_q: 2805.396 Interval 4132 (2065500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7178 8 episodes - episode_reward: -373.338 [-471.817, -289.737] - loss: 5776.038 - mae: 2105.186 - mean_q: 2788.262 Interval 4133 (2066000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.9859 9 episodes - episode_reward: -390.265 [-833.827, -100.000] - loss: 6932.746 - mae: 2068.248 - mean_q: 2736.924 Interval 4134 (2066500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.1519 7 episodes - episode_reward: -366.170 [-483.421, -264.984] - loss: 6325.226 - mae: 2063.645 - mean_q: 2731.626 Interval 4135 (2067000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.0468 8 episodes - episode_reward: -443.110 [-955.133, -100.000] - loss: 6395.270 - mae: 2028.438 - mean_q: 2681.500 Interval 4136 (2067500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.5850 7 episodes - episode_reward: -468.293 [-872.657, -222.306] - loss: 7000.924 - mae: 2037.779 - mean_q: 2693.746 Interval 4137 (2068000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.3371 8 episodes - episode_reward: -326.064 [-384.342, -235.675] - loss: 6083.229 - mae: 2010.713 - mean_q: 2657.860 Interval 4138 (2068500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.4359 7 episodes - episode_reward: -422.192 [-888.942, -311.823] - loss: 5873.852 - mae: 1970.375 - mean_q: 2602.137 Interval 4139 (2069000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.6998 7 episodes - episode_reward: -524.911 [-828.239, -337.300] - loss: 6314.938 - mae: 1919.801 - mean_q: 2529.764 Interval 4140 (2069500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.2736 7 episodes - episode_reward: -414.913 [-541.676, -288.943] - loss: 6363.081 - mae: 1925.686 - mean_q: 2539.013 Interval 4141 (2070000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -7.3056 9 episodes - episode_reward: -434.061 [-826.923, -197.028] - loss: 6288.957 - mae: 1909.772 - mean_q: 2516.004 Interval 4142 (2070500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -7.0704 8 episodes - episode_reward: -397.399 [-859.373, -100.000] - loss: 6494.121 - mae: 1865.761 - mean_q: 2454.692 Interval 4143 (2071000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -8.3076 9 episodes - episode_reward: -473.452 [-619.985, -344.451] - loss: 7281.965 - mae: 1828.203 - mean_q: 2402.091 Interval 4144 (2071500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.1336 7 episodes - episode_reward: -532.288 [-989.497, -351.482] - loss: 6563.480 - mae: 1777.288 - mean_q: 2331.920 Interval 4145 (2072000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.0787 7 episodes - episode_reward: -509.276 [-879.843, -305.668] - loss: 7173.495 - mae: 1736.106 - mean_q: 2273.999 Interval 4146 (2072500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.3037 9 episodes - episode_reward: -328.399 [-497.443, -100.000] - loss: 6850.530 - mae: 1692.898 - mean_q: 2215.919 Interval 4147 (2073000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.7226 9 episodes - episode_reward: -455.333 [-917.165, -276.296] - loss: 6637.555 - mae: 1700.681 - mean_q: 2228.190 Interval 4148 (2073500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.0555 7 episodes - episode_reward: -500.665 [-1037.962, -250.320] - loss: 6498.752 - mae: 1644.240 - mean_q: 2148.997 Interval 4149 (2074000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.5531 7 episodes - episode_reward: -458.881 [-597.505, -328.565] - loss: 6809.553 - mae: 1634.617 - mean_q: 2134.358 Interval 4150 (2074500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.3998 9 episodes - episode_reward: -474.348 [-823.240, -271.375] - loss: 6453.875 - mae: 1614.720 - mean_q: 2107.508 Interval 4151 (2075000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9365 7 episodes - episode_reward: -356.816 [-639.441, -170.085] - loss: 7258.020 - mae: 1588.847 - mean_q: 2070.104 Interval 4152 (2075500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.1340 5 episodes - episode_reward: -407.332 [-443.743, -344.833] - loss: 6878.517 - mae: 1562.244 - mean_q: 2036.295 Interval 4153 (2076000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8691 5 episodes - episode_reward: -391.669 [-617.539, -196.597] - loss: 6504.125 - mae: 1533.574 - mean_q: 1997.766 Interval 4154 (2076500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5984 5 episodes - episode_reward: -560.073 [-1122.583, -327.071] - loss: 6456.964 - mae: 1515.409 - mean_q: 1973.088 Interval 4155 (2077000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1822 Interval 4156 (2077500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2387 Interval 4157 (2078000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1498 Interval 4158 (2078500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2066 Interval 4159 (2079000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1687 Interval 4160 (2079500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2771 Interval 4161 (2080000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2778 1 episodes - episode_reward: -1196.526 [-1196.526, -1196.526] - loss: 5443.175 - mae: 1341.579 - mean_q: 1741.706 Interval 4162 (2080500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3799 Interval 4163 (2081000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0364 Interval 4164 (2081500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4416 Interval 4165 (2082000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2010 Interval 4166 (2082500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8249 2 episodes - episode_reward: -671.810 [-1090.222, -253.398] - loss: 5091.666 - mae: 1218.925 - mean_q: 1581.092 Interval 4167 (2083000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1280 Interval 4168 (2083500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0224 Interval 4169 (2084000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1736 Interval 4170 (2084500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3736 Interval 4171 (2085000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7536 2 episodes - episode_reward: -633.362 [-1063.398, -203.326] - loss: 4637.673 - mae: 1121.454 - mean_q: 1450.361 Interval 4172 (2085500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1370 Interval 4173 (2086000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8277 1 episodes - episode_reward: -727.792 [-727.792, -727.792] - loss: 4734.369 - mae: 1092.161 - mean_q: 1409.512 Interval 4174 (2086500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6221 3 episodes - episode_reward: -504.993 [-581.539, -420.257] - loss: 4582.716 - mae: 1094.875 - mean_q: 1416.229 Interval 4175 (2087000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1061 Interval 4176 (2087500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7498 2 episodes - episode_reward: -528.979 [-845.967, -211.992] - loss: 4142.353 - mae: 1048.048 - mean_q: 1351.332 Interval 4177 (2088000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3664 4 episodes - episode_reward: -494.809 [-700.810, -370.787] - loss: 3833.951 - mae: 1016.070 - mean_q: 1307.000 Interval 4178 (2088500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3740 2 episodes - episode_reward: -408.683 [-435.999, -381.367] - loss: 3751.865 - mae: 1014.747 - mean_q: 1305.517 Interval 4179 (2089000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1546 Interval 4180 (2089500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2261 Interval 4181 (2090000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0522 Interval 4182 (2090500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -2.1449 1 episodes - episode_reward: -1023.738 [-1023.738, -1023.738] - loss: 3445.761 - mae: 969.409 - mean_q: 1244.430 Interval 4183 (2091000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0053 2 episodes - episode_reward: -635.789 [-768.474, -503.104] - loss: 3489.379 - mae: 953.129 - mean_q: 1222.980 Interval 4184 (2091500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5453 2 episodes - episode_reward: -360.984 [-614.953, -107.016] - loss: 3605.047 - mae: 937.619 - mean_q: 1202.674 Interval 4185 (2092000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1845 2 episodes - episode_reward: -347.844 [-368.147, -327.541] - loss: 2951.938 - mae: 913.079 - mean_q: 1169.185 Interval 4186 (2092500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1982 Interval 4187 (2093000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2219 Interval 4188 (2093500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0610 1 episodes - episode_reward: -725.981 [-725.981, -725.981] - loss: 2701.955 - mae: 870.038 - mean_q: 1109.854 Interval 4189 (2094000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5968 4 episodes - episode_reward: -316.580 [-433.050, -100.000] - loss: 2745.427 - mae: 854.616 - mean_q: 1089.131 Interval 4190 (2094500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6756 3 episodes - episode_reward: -376.506 [-492.489, -318.505] - loss: 2733.902 - mae: 849.032 - mean_q: 1083.708 Interval 4191 (2095000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7056 6 episodes - episode_reward: -344.118 [-634.656, -196.962] - loss: 2590.098 - mae: 824.500 - mean_q: 1050.628 Interval 4192 (2095500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7627 1 episodes - episode_reward: -247.292 [-247.292, -247.292] - loss: 2339.843 - mae: 811.254 - mean_q: 1033.129 Interval 4193 (2096000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7058 8 episodes - episode_reward: -248.047 [-486.146, -100.000] - loss: 2361.409 - mae: 791.161 - mean_q: 1007.307 Interval 4194 (2096500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.1232 5 episodes - episode_reward: -419.137 [-546.999, -147.936] - loss: 2368.309 - mae: 774.245 - mean_q: 984.592 Interval 4195 (2097000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4548 3 episodes - episode_reward: -362.763 [-529.100, -155.405] - loss: 2520.394 - mae: 767.239 - mean_q: 975.078 Interval 4196 (2097500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8615 4 episodes - episode_reward: -241.314 [-348.469, -135.891] - loss: 2437.237 - mae: 747.495 - mean_q: 948.234 Interval 4197 (2098000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0734 Interval 4198 (2098500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8294 2 episodes - episode_reward: -907.862 [-1285.937, -529.787] - loss: 2297.722 - mae: 737.141 - mean_q: 935.309 Interval 4199 (2099000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3171 2 episodes - episode_reward: -918.969 [-1499.367, -338.571] - loss: 2395.725 - mae: 723.235 - mean_q: 914.839 Interval 4200 (2099500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7997 3 episodes - episode_reward: -624.046 [-803.961, -493.139] - loss: 2320.937 - mae: 707.529 - mean_q: 891.938 Interval 4201 (2100000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3444 5 episodes - episode_reward: -451.398 [-551.992, -346.813] - loss: 2467.504 - mae: 697.694 - mean_q: 879.856 Interval 4202 (2100500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9749 1 episodes - episode_reward: -1391.764 [-1391.764, -1391.764] - loss: 2310.246 - mae: 676.868 - mean_q: 849.530 Interval 4203 (2101000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.2972 4 episodes - episode_reward: -540.505 [-761.835, -422.602] - loss: 2130.670 - mae: 663.698 - mean_q: 831.479 Interval 4204 (2101500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5209 4 episodes - episode_reward: -428.297 [-697.052, -110.787] - loss: 2307.412 - mae: 657.187 - mean_q: 823.348 Interval 4205 (2102000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7088 4 episodes - episode_reward: -467.789 [-553.779, -309.734] - loss: 2349.882 - mae: 640.697 - mean_q: 800.507 Interval 4206 (2102500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3947 3 episodes - episode_reward: -595.111 [-918.670, -343.535] - loss: 2485.640 - mae: 625.195 - mean_q: 779.509 Interval 4207 (2103000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.1304 3 episodes - episode_reward: -525.856 [-697.292, -303.183] - loss: 2480.909 - mae: 617.672 - mean_q: 767.838 Interval 4208 (2103500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0352 6 episodes - episode_reward: -582.521 [-1274.517, -297.159] - loss: 2273.349 - mae: 602.813 - mean_q: 745.598 Interval 4209 (2104000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4711 5 episodes - episode_reward: -456.710 [-692.210, -130.596] - loss: 2272.963 - mae: 591.319 - mean_q: 729.703 Interval 4210 (2104500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3011 3 episodes - episode_reward: -551.982 [-1362.413, -100.000] - loss: 2543.483 - mae: 582.511 - mean_q: 716.307 Interval 4211 (2105000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.5328 3 episodes - episode_reward: -609.879 [-733.397, -382.963] - loss: 2346.377 - mae: 566.577 - mean_q: 692.963 Interval 4212 (2105500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8938 2 episodes - episode_reward: -631.936 [-676.232, -587.640] - loss: 2335.510 - mae: 557.968 - mean_q: 680.470 Interval 4213 (2106000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1546 1 episodes - episode_reward: -836.827 [-836.827, -836.827] - loss: 2208.524 - mae: 551.910 - mean_q: 675.065 Interval 4214 (2106500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6135 7 episodes - episode_reward: -504.850 [-1477.177, -107.887] - loss: 2090.020 - mae: 538.167 - mean_q: 652.233 Interval 4215 (2107000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.5704 5 episodes - episode_reward: -469.560 [-898.677, -238.215] - loss: 2060.348 - mae: 537.680 - mean_q: 650.869 Interval 4216 (2107500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7013 5 episodes - episode_reward: -459.134 [-626.133, -100.000] - loss: 2067.704 - mae: 527.587 - mean_q: 636.077 Interval 4217 (2108000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2830 3 episodes - episode_reward: -903.098 [-1369.607, -406.276] - loss: 2213.691 - mae: 528.823 - mean_q: 638.255 Interval 4218 (2108500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.5586 5 episodes - episode_reward: -433.431 [-743.042, -138.558] - loss: 1952.390 - mae: 513.657 - mean_q: 614.760 Interval 4219 (2109000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.6251 4 episodes - episode_reward: -551.862 [-697.006, -391.238] - loss: 2011.255 - mae: 515.266 - mean_q: 617.821 Interval 4220 (2109500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0458 3 episodes - episode_reward: -712.592 [-858.686, -571.959] - loss: 2140.347 - mae: 518.939 - mean_q: 620.819 Interval 4221 (2110000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8270 4 episodes - episode_reward: -703.349 [-914.147, -507.563] - loss: 1998.845 - mae: 517.092 - mean_q: 616.692 Interval 4222 (2110500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1132 4 episodes - episode_reward: -437.129 [-541.225, -201.432] - loss: 2165.002 - mae: 519.666 - mean_q: 619.615 Interval 4223 (2111000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.1706 3 episodes - episode_reward: -548.684 [-756.663, -162.119] - loss: 2082.729 - mae: 519.157 - mean_q: 616.993 Interval 4224 (2111500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8912 4 episodes - episode_reward: -598.540 [-700.443, -448.245] - loss: 2269.414 - mae: 529.233 - mean_q: 630.253 Interval 4225 (2112000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5669 1 episodes - episode_reward: -789.036 [-789.036, -789.036] - loss: 2391.178 - mae: 535.841 - mean_q: 637.703 Interval 4226 (2112500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1532 Interval 4227 (2113000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2204 Interval 4228 (2113500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.3227 4 episodes - episode_reward: -333.384 [-775.006, -106.763] - loss: 2395.644 - mae: 576.634 - mean_q: 694.211 Interval 4229 (2114000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0299 Interval 4230 (2114500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1808 Interval 4231 (2115000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1988 Interval 4232 (2115500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1440 Interval 4233 (2116000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2024 Interval 4234 (2116500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1544 Interval 4235 (2117000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1821 Interval 4236 (2117500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1855 Interval 4237 (2118000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1974 Interval 4238 (2118500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.2006 Interval 4239 (2119000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1794 Interval 4240 (2119500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1807 Interval 4241 (2120000 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.2241 Interval 4242 (2120500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1321 Interval 4243 (2121000 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.2226 Interval 4244 (2121500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.3158 Interval 4245 (2122000 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.0096 Interval 4246 (2122500 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1909 Interval 4247 (2123000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1962 Interval 4248 (2123500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -1.2446 1 episodes - episode_reward: -2122.370 [-2122.370, -2122.370] - loss: 3160.262 - mae: 924.900 - mean_q: 1190.770 Interval 4249 (2124000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1804 5 episodes - episode_reward: -555.608 [-687.858, -434.188] - loss: 3360.103 - mae: 951.649 - mean_q: 1227.507 Interval 4250 (2124500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3394 5 episodes - episode_reward: -428.991 [-687.718, -128.552] - loss: 3389.406 - mae: 978.210 - mean_q: 1262.083 Interval 4251 (2125000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7976: 0s - reward: -4. 7 episodes - episode_reward: -317.554 [-576.771, -135.730] - loss: 3346.706 - mae: 1003.644 - mean_q: 1296.764 Interval 4252 (2125500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8519 8 episodes - episode_reward: -394.401 [-720.343, -142.986] - loss: 3775.633 - mae: 1044.528 - mean_q: 1350.246 Interval 4253 (2126000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.6415 7 episodes - episode_reward: -245.460 [-483.603, -100.000] - loss: 3964.028 - mae: 1096.043 - mean_q: 1415.425 Interval 4254 (2126500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9046 7 episodes - episode_reward: -361.104 [-616.649, -133.956] - loss: 4016.866 - mae: 1123.659 - mean_q: 1449.880 Interval 4255 (2127000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3719 5 episodes - episode_reward: -439.082 [-584.273, -109.490] - loss: 4453.620 - mae: 1157.594 - mean_q: 1494.469 Interval 4256 (2127500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6980 8 episodes - episode_reward: -290.379 [-485.194, -129.089] - loss: 4708.749 - mae: 1197.060 - mean_q: 1547.124 Interval 4257 (2128000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0227 5 episodes - episode_reward: -468.497 [-610.659, -240.861] - loss: 4815.277 - mae: 1255.647 - mean_q: 1624.551 Interval 4258 (2128500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7233 5 episodes - episode_reward: -499.059 [-624.914, -363.276] - loss: 5473.475 - mae: 1297.301 - mean_q: 1677.892 Interval 4259 (2129000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0672 5 episodes - episode_reward: -425.946 [-669.674, -165.515] - loss: 5732.756 - mae: 1350.538 - mean_q: 1747.969 Interval 4260 (2129500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.2807 8 episodes - episode_reward: -323.405 [-761.197, -83.755] - loss: 6426.400 - mae: 1417.459 - mean_q: 1837.831 Interval 4261 (2130000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4837 4 episodes - episode_reward: -561.530 [-672.662, -460.831] - loss: 7399.915 - mae: 1460.073 - mean_q: 1892.667 Interval 4262 (2130500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4692 5 episodes - episode_reward: -403.378 [-509.433, -250.160] - loss: 7962.898 - mae: 1490.583 - mean_q: 1928.142 Interval 4263 (2131000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.0112 6 episodes - episode_reward: -360.183 [-682.741, -100.000] - loss: 8655.865 - mae: 1542.504 - mean_q: 1999.077 Interval 4264 (2131500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1662 4 episodes - episode_reward: -566.041 [-740.113, -493.399] - loss: 9048.603 - mae: 1592.557 - mean_q: 2061.301 Interval 4265 (2132000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5856 4 episodes - episode_reward: -507.532 [-634.434, -442.135] - loss: 9887.244 - mae: 1604.307 - mean_q: 2073.093 Interval 4266 (2132500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2303 5 episodes - episode_reward: -345.323 [-442.401, -232.897] - loss: 10015.392 - mae: 1584.554 - mean_q: 2044.060 Interval 4267 (2133000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8161 5 episodes - episode_reward: -489.042 [-522.639, -414.141] - loss: 9590.944 - mae: 1585.843 - mean_q: 2047.103 Interval 4268 (2133500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.2785 6 episodes - episode_reward: -346.784 [-527.777, -110.292] - loss: 9455.357 - mae: 1546.932 - mean_q: 1991.968 Interval 4269 (2134000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7236 4 episodes - episode_reward: -440.010 [-515.098, -364.849] - loss: 9322.686 - mae: 1514.140 - mean_q: 1946.294 Interval 4270 (2134500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8442 6 episodes - episode_reward: -517.674 [-1100.179, -255.289] - loss: 10825.193 - mae: 1515.612 - mean_q: 1945.851 Interval 4271 (2135000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3384 1 episodes - episode_reward: -602.847 [-602.847, -602.847] - loss: 8948.345 - mae: 1485.846 - mean_q: 1903.902 Interval 4272 (2135500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2008 Interval 4273 (2136000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2779 Interval 4274 (2136500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1827 Interval 4275 (2137000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8030 3 episodes - episode_reward: -354.278 [-483.954, -253.977] - loss: 8396.997 - mae: 1342.964 - mean_q: 1711.522 Interval 4276 (2137500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.2787 4 episodes - episode_reward: -564.254 [-946.591, -341.324] - loss: 7855.311 - mae: 1283.699 - mean_q: 1629.599 Interval 4277 (2138000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0682 7 episodes - episode_reward: -366.621 [-585.609, -182.649] - loss: 7792.976 - mae: 1271.629 - mean_q: 1616.109 Interval 4278 (2138500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9990 2 episodes - episode_reward: -576.331 [-658.532, -494.130] - loss: 7407.277 - mae: 1202.630 - mean_q: 1522.737 Interval 4279 (2139000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1707 8 episodes - episode_reward: -313.104 [-577.243, -100.000] - loss: 7071.099 - mae: 1156.247 - mean_q: 1461.614 Interval 4280 (2139500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7048 7 episodes - episode_reward: -460.183 [-648.006, -100.000] - loss: 6589.060 - mae: 1090.432 - mean_q: 1369.040 Interval 4281 (2140000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8603 8 episodes - episode_reward: -380.011 [-560.396, -188.109] - loss: 5654.911 - mae: 1029.357 - mean_q: 1286.000 Interval 4282 (2140500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9927 1 episodes - episode_reward: -463.005 [-463.005, -463.005] - loss: 5768.265 - mae: 989.943 - mean_q: 1231.164 Interval 4283 (2141000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4694 4 episodes - episode_reward: -563.369 [-664.168, -394.546] - loss: 5989.671 - mae: 976.996 - mean_q: 1215.164 Interval 4284 (2141500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5536 2 episodes - episode_reward: -804.122 [-829.691, -778.554] - loss: 5458.342 - mae: 934.697 - mean_q: 1159.417 Interval 4285 (2142000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4466 9 episodes - episode_reward: -319.107 [-663.087, -100.207] - loss: 5133.224 - mae: 899.024 - mean_q: 1110.886 Interval 4286 (2142500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1950 4 episodes - episode_reward: -581.923 [-659.893, -480.376] - loss: 4644.164 - mae: 840.990 - mean_q: 1030.988 Interval 4287 (2143000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8015 6 episodes - episode_reward: -525.591 [-668.533, -419.155] - loss: 4573.181 - mae: 804.313 - mean_q: 981.997 Interval 4288 (2143500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6293 6 episodes - episode_reward: -392.109 [-683.200, -104.701] - loss: 4374.266 - mae: 755.656 - mean_q: 915.913 Interval 4289 (2144000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7499 2 episodes - episode_reward: -1407.662 [-2208.804, -606.520] - loss: 4203.480 - mae: 732.302 - mean_q: 884.096 Interval 4290 (2144500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6859 5 episodes - episode_reward: -439.659 [-555.136, -320.226] - loss: 3876.593 - mae: 716.834 - mean_q: 862.199 Interval 4291 (2145000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0670 4 episodes - episode_reward: -658.486 [-1624.299, -227.918] - loss: 3721.690 - mae: 685.938 - mean_q: 820.851 Interval 4292 (2145500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0276 6 episodes - episode_reward: -415.419 [-699.380, -132.633] - loss: 3516.887 - mae: 663.024 - mean_q: 790.226 Interval 4293 (2146000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8731 7 episodes - episode_reward: -327.539 [-611.070, -127.560] - loss: 3858.202 - mae: 652.318 - mean_q: 774.409 Interval 4294 (2146500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6552 5 episodes - episode_reward: -536.177 [-707.354, -199.554] - loss: 3095.281 - mae: 636.534 - mean_q: 755.663 Interval 4295 (2147000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.9869 7 episodes - episode_reward: -473.080 [-721.282, -168.622] - loss: 3434.425 - mae: 617.338 - mean_q: 728.938 Interval 4296 (2147500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5250 6 episodes - episode_reward: -483.748 [-691.269, -270.057] - loss: 3156.752 - mae: 605.518 - mean_q: 712.981 Interval 4297 (2148000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0435 5 episodes - episode_reward: -464.452 [-574.627, -303.341] - loss: 3166.423 - mae: 602.914 - mean_q: 710.329 Interval 4298 (2148500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.2132 6 episodes - episode_reward: -520.924 [-714.192, -133.368] - loss: 3217.639 - mae: 574.778 - mean_q: 668.547 Interval 4299 (2149000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9713 3 episodes - episode_reward: -716.316 [-1161.548, -164.305] - loss: 2967.211 - mae: 569.733 - mean_q: 661.900 Interval 4300 (2149500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6027 9 episodes - episode_reward: -364.621 [-669.791, -100.000] - loss: 2958.669 - mae: 555.246 - mean_q: 642.784 Interval 4301 (2150000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5091 5 episodes - episode_reward: -555.988 [-1087.044, -200.792] - loss: 2796.078 - mae: 538.244 - mean_q: 617.733 Interval 4302 (2150500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.5071 2 episodes - episode_reward: -1363.661 [-2260.862, -466.461] - loss: 2839.609 - mae: 534.782 - mean_q: 612.348 Interval 4303 (2151000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6790 5 episodes - episode_reward: -380.940 [-586.391, -166.155] - loss: 2868.708 - mae: 521.039 - mean_q: 593.589 Interval 4304 (2151500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2753 6 episodes - episode_reward: -513.201 [-735.994, -245.976] - loss: 2868.616 - mae: 517.068 - mean_q: 588.174 Interval 4305 (2152000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4007 2 episodes - episode_reward: -1078.937 [-1740.642, -417.231] - loss: 2288.853 - mae: 494.793 - mean_q: 556.727 Interval 4306 (2152500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.7219 3 episodes - episode_reward: -1196.889 [-2453.689, -441.674] - loss: 2359.585 - mae: 502.859 - mean_q: 567.286 Interval 4307 (2153000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1255 7 episodes - episode_reward: -379.140 [-633.137, -117.345] - loss: 2251.115 - mae: 478.364 - mean_q: 531.829 Interval 4308 (2153500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.6066 6 episodes - episode_reward: -311.155 [-541.937, -152.799] - loss: 2027.495 - mae: 467.265 - mean_q: 516.085 Interval 4309 (2154000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3144 3 episodes - episode_reward: -557.688 [-764.660, -454.051] - loss: 2257.194 - mae: 469.455 - mean_q: 518.245 Interval 4310 (2154500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6051 6 episodes - episode_reward: -471.270 [-699.958, -142.632] - loss: 2006.065 - mae: 462.743 - mean_q: 509.234 Interval 4311 (2155000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2773 4 episodes - episode_reward: -582.341 [-1253.234, -126.677] - loss: 1966.444 - mae: 453.039 - mean_q: 494.735 Interval 4312 (2155500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4596 6 episodes - episode_reward: -418.163 [-633.691, -165.426] - loss: 1952.102 - mae: 456.006 - mean_q: 497.969 Interval 4313 (2156000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6987 6 episodes - episode_reward: -488.680 [-675.392, -357.683] - loss: 2058.991 - mae: 458.580 - mean_q: 500.175 Interval 4314 (2156500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8927 4 episodes - episode_reward: -634.405 [-1049.340, -380.220] - loss: 1941.543 - mae: 469.621 - mean_q: 516.728 Interval 4315 (2157000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1464 3 episodes - episode_reward: -826.411 [-1505.948, -471.040] - loss: 2325.902 - mae: 497.696 - mean_q: 555.233 Interval 4316 (2157500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.8330 8 episodes - episode_reward: -358.660 [-533.138, -101.886] - loss: 2468.080 - mae: 523.500 - mean_q: 589.419 Interval 4317 (2158000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.4409 8 episodes - episode_reward: -411.013 [-514.480, -125.202] - loss: 2391.571 - mae: 544.368 - mean_q: 618.042 Interval 4318 (2158500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.0852 9 episodes - episode_reward: -455.328 [-835.191, -110.181] - loss: 2325.281 - mae: 570.889 - mean_q: 654.879 Interval 4319 (2159000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.1174 8 episodes - episode_reward: -503.653 [-703.832, -185.607] - loss: 2650.495 - mae: 607.587 - mean_q: 703.100 Interval 4320 (2159500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.6016 9 episodes - episode_reward: -476.915 [-751.402, -266.114] - loss: 2608.558 - mae: 628.103 - mean_q: 730.503 Interval 4321 (2160000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.8450 9 episodes - episode_reward: -415.695 [-708.353, -137.584] - loss: 3064.465 - mae: 649.802 - mean_q: 759.170 Interval 4322 (2160500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.6924 7 episodes - episode_reward: -605.772 [-1197.295, -380.735] - loss: 3234.784 - mae: 668.130 - mean_q: 782.856 Interval 4323 (2161000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.5964 10 episodes - episode_reward: -390.136 [-705.392, -100.000] - loss: 3499.445 - mae: 674.942 - mean_q: 791.257 Interval 4324 (2161500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.9089 8 episodes - episode_reward: -458.744 [-707.364, -130.192] - loss: 3421.740 - mae: 669.060 - mean_q: 782.370 Interval 4325 (2162000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.8768 7 episodes - episode_reward: -452.514 [-664.192, -235.139] - loss: 3171.945 - mae: 636.329 - mean_q: 735.074 Interval 4326 (2162500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.3783 8 episodes - episode_reward: -524.908 [-791.864, -311.929] - loss: 3116.797 - mae: 608.260 - mean_q: 696.166 Interval 4327 (2163000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4624 5 episodes - episode_reward: -484.354 [-588.164, -393.266] - loss: 2878.146 - mae: 567.963 - mean_q: 640.924 Interval 4328 (2163500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.4728 7 episodes - episode_reward: -542.949 [-938.812, -192.774] - loss: 2463.506 - mae: 546.908 - mean_q: 612.321 Interval 4329 (2164000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3928 5 episodes - episode_reward: -533.361 [-646.372, -350.689] - loss: 2088.155 - mae: 509.792 - mean_q: 560.276 Interval 4330 (2164500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.0010 7 episodes - episode_reward: -502.053 [-751.483, -121.225] - loss: 2254.987 - mae: 482.558 - mean_q: 523.186 Interval 4331 (2165000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5988 4 episodes - episode_reward: -451.481 [-521.114, -281.247] - loss: 2212.316 - mae: 455.406 - mean_q: 486.104 Interval 4332 (2165500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.7271 7 episodes - episode_reward: -535.768 [-819.804, -297.901] - loss: 2238.192 - mae: 424.002 - mean_q: 442.269 Interval 4333 (2166000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.1296 8 episodes - episode_reward: -518.471 [-979.045, -147.741] - loss: 2116.950 - mae: 407.452 - mean_q: 418.144 Interval 4334 (2166500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0989 5 episodes - episode_reward: -502.824 [-794.186, -236.598] - loss: 1847.923 - mae: 386.213 - mean_q: 389.272 Interval 4335 (2167000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0773 Interval 4336 (2167500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.8928 4 episodes - episode_reward: -605.595 [-871.352, -467.629] - loss: 2068.737 - mae: 360.302 - mean_q: 351.581 Interval 4337 (2168000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.5881 9 episodes - episode_reward: -409.844 [-668.144, -122.910] - loss: 1625.751 - mae: 341.915 - mean_q: 325.943 Interval 4338 (2168500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.3079 9 episodes - episode_reward: -489.202 [-815.813, -100.000] - loss: 1984.551 - mae: 335.985 - mean_q: 316.385 Interval 4339 (2169000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4843 5 episodes - episode_reward: -526.750 [-874.400, -356.671] - loss: 1805.949 - mae: 317.862 - mean_q: 290.090 Interval 4340 (2169500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -9.4529 9 episodes - episode_reward: -488.484 [-1058.104, -97.503] - loss: 1783.122 - mae: 303.359 - mean_q: 268.110 Interval 4341 (2170000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.9126 8 episodes - episode_reward: -529.617 [-1101.252, -175.660] - loss: 1791.228 - mae: 290.379 - mean_q: 249.655 Interval 4342 (2170500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -9.0287 6 episodes - episode_reward: -766.018 [-1580.627, -348.724] - loss: 1843.446 - mae: 276.704 - mean_q: 228.629 Interval 4343 (2171000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.4725 7 episodes - episode_reward: -468.980 [-655.982, -263.991] - loss: 1432.716 - mae: 251.046 - mean_q: 191.729 Interval 4344 (2171500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1477 Interval 4345 (2172000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1927 Interval 4346 (2172500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1943 Interval 4347 (2173000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1492 Interval 4348 (2173500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1754 Interval 4349 (2174000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1710 Interval 4350 (2174500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1630 Interval 4351 (2175000 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1594 Interval 4352 (2175500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1771 Interval 4353 (2176000 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.2244 Interval 4354 (2176500 steps performed) 500/500 [==============================] - 23s 46ms/step - reward: -0.1689 Interval 4355 (2177000 steps performed) 500/500 [==============================] - 104s 208ms/step - reward: -0.1623 Interval 4356 (2177500 steps performed) 500/500 [==============================] - 9s 19ms/step - reward: -0.1594 Interval 4357 (2178000 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.1949 Interval 4358 (2178500 steps performed) 500/500 [==============================] - 11s 22ms/step - reward: -0.1463 Interval 4359 (2179000 steps performed) 500/500 [==============================] - 12s 24ms/step - reward: -0.2209 Interval 4360 (2179500 steps performed) 500/500 [==============================] - 13s 25ms/step - reward: -0.5475 Interval 4361 (2180000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -6.2885 3 episodes - episode_reward: -1467.410 [-3488.689, -388.306] - loss: 1500.755 - mae: 410.306 - mean_q: 433.805 Interval 4362 (2180500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.7293 5 episodes - episode_reward: -731.663 [-1738.972, -279.160] - loss: 1261.251 - mae: 398.917 - mean_q: 419.233 Interval 4363 (2181000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.4242 5 episodes - episode_reward: -1060.149 [-1639.694, -377.994] - loss: 1471.864 - mae: 402.342 - mean_q: 422.131 Interval 4364 (2181500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -8.3373 8 episodes - episode_reward: -525.398 [-984.199, -139.416] - loss: 1691.728 - mae: 402.313 - mean_q: 422.435 Interval 4365 (2182000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.0589 4 episodes - episode_reward: -767.230 [-1555.292, -237.582] - loss: 1469.902 - mae: 411.220 - mean_q: 436.201 Interval 4366 (2182500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -8.0447 6 episodes - episode_reward: -643.968 [-1857.074, -100.000] - loss: 1716.287 - mae: 412.132 - mean_q: 438.087 Interval 4367 (2183000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6500 5 episodes - episode_reward: -557.274 [-828.659, -211.515] - loss: 1665.975 - mae: 408.336 - mean_q: 432.076 Interval 4368 (2183500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6690 8 episodes - episode_reward: -375.164 [-1316.233, -152.934] - loss: 1598.110 - mae: 407.556 - mean_q: 431.483 Interval 4369 (2184000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.4167 10 episodes - episode_reward: -423.023 [-743.066, -100.000] - loss: 1595.801 - mae: 424.341 - mean_q: 456.208 Interval 4370 (2184500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.0659 5 episodes - episode_reward: -711.900 [-1441.764, -265.416] - loss: 1580.012 - mae: 413.283 - mean_q: 439.636 Interval 4371 (2185000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.5974 2 episodes - episode_reward: -1027.241 [-1107.935, -946.546] - loss: 1480.601 - mae: 397.850 - mean_q: 415.878 Interval 4372 (2185500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.8072 6 episodes - episode_reward: -709.448 [-1539.980, -117.646] - loss: 1560.607 - mae: 392.369 - mean_q: 408.651 Interval 4373 (2186000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.7386 6 episodes - episode_reward: -587.908 [-862.646, -180.584] - loss: 1712.505 - mae: 377.257 - mean_q: 385.847 Interval 4374 (2186500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -8.2052 9 episodes - episode_reward: -472.363 [-750.810, -130.403] - loss: 1712.320 - mae: 362.789 - mean_q: 363.668 Interval 4375 (2187000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2062 4 episodes - episode_reward: -540.764 [-993.491, -359.030] - loss: 1767.450 - mae: 371.855 - mean_q: 377.644 Interval 4376 (2187500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.7045 6 episodes - episode_reward: -472.687 [-897.172, -216.497] - loss: 1690.616 - mae: 370.151 - mean_q: 375.306 Interval 4377 (2188000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.0008 3 episodes - episode_reward: -667.286 [-833.990, -350.029] - loss: 1485.453 - mae: 365.002 - mean_q: 367.224 Interval 4378 (2188500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.3488 3 episodes - episode_reward: -730.261 [-857.260, -515.550] - loss: 1615.362 - mae: 366.947 - mean_q: 371.384 Interval 4379 (2189000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.2837 4 episodes - episode_reward: -662.008 [-897.294, -477.250] - loss: 1311.048 - mae: 363.231 - mean_q: 366.761 Interval 4380 (2189500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -8.1739 8 episodes - episode_reward: -487.889 [-927.510, -178.379] - loss: 1866.404 - mae: 364.061 - mean_q: 366.385 Interval 4381 (2190000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.5644 6 episodes - episode_reward: -518.372 [-1121.022, -108.621] - loss: 1308.952 - mae: 357.713 - mean_q: 358.828 Interval 4382 (2190500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -8.2182 8 episodes - episode_reward: -556.382 [-1012.405, -73.408] - loss: 1396.942 - mae: 360.769 - mean_q: 363.487 Interval 4383 (2191000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.9368 5 episodes - episode_reward: -573.006 [-896.393, -347.870] - loss: 1500.615 - mae: 350.616 - mean_q: 349.377 Interval 4384 (2191500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0108 5 episodes - episode_reward: -603.312 [-828.642, -245.138] - loss: 1370.171 - mae: 336.347 - mean_q: 326.405 Interval 4385 (2192000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.9976 4 episodes - episode_reward: -608.728 [-872.607, -416.576] - loss: 1251.774 - mae: 345.450 - mean_q: 339.629 Interval 4386 (2192500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.4291 5 episodes - episode_reward: -563.416 [-968.966, -246.377] - loss: 1263.036 - mae: 346.917 - mean_q: 342.675 Interval 4387 (2193000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2123 6 episodes - episode_reward: -429.534 [-536.689, -318.487] - loss: 1287.507 - mae: 354.927 - mean_q: 354.245 Interval 4388 (2193500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.1087 9 episodes - episode_reward: -348.393 [-722.026, -100.000] - loss: 1411.324 - mae: 358.626 - mean_q: 359.541 Interval 4389 (2194000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.7540 3 episodes - episode_reward: -845.902 [-1235.855, -637.950] - loss: 1151.694 - mae: 350.100 - mean_q: 347.817 Interval 4390 (2194500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4283 5 episodes - episode_reward: -289.957 [-484.776, -141.386] - loss: 1242.979 - mae: 349.050 - mean_q: 345.795 Interval 4391 (2195000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.1101 4 episodes - episode_reward: -432.320 [-571.949, -112.348] - loss: 1191.061 - mae: 352.512 - mean_q: 351.496 Interval 4392 (2195500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1942 2 episodes - episode_reward: -840.380 [-900.258, -780.503] - loss: 1125.722 - mae: 352.257 - mean_q: 352.121 Interval 4393 (2196000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4620 3 episodes - episode_reward: -685.706 [-1670.101, -180.546] - loss: 1066.971 - mae: 351.002 - mean_q: 350.307 Interval 4394 (2196500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0855 3 episodes - episode_reward: -682.077 [-817.600, -493.113] - loss: 1029.637 - mae: 351.978 - mean_q: 352.229 Interval 4395 (2197000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.5108 7 episodes - episode_reward: -396.699 [-617.032, -265.706] - loss: 1096.946 - mae: 346.236 - mean_q: 343.641 Interval 4396 (2197500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4572 4 episodes - episode_reward: -518.620 [-1133.316, -192.531] - loss: 907.539 - mae: 336.190 - mean_q: 327.773 Interval 4397 (2198000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.8583 3 episodes - episode_reward: -694.424 [-880.104, -504.289] - loss: 954.567 - mae: 343.328 - mean_q: 339.270 Interval 4398 (2198500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.9584 4 episodes - episode_reward: -620.088 [-805.463, -313.763] - loss: 923.834 - mae: 336.741 - mean_q: 329.679 Interval 4399 (2199000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -8.2331 8 episodes - episode_reward: -508.119 [-1011.661, -214.701] - loss: 940.952 - mae: 335.032 - mean_q: 327.315 Interval 4400 (2199500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2155 4 episodes - episode_reward: -538.643 [-1417.685, -112.085] - loss: 944.574 - mae: 336.016 - mean_q: 328.993 Interval 4401 (2200000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.0706 3 episodes - episode_reward: -675.745 [-1147.685, -397.779] - loss: 827.625 - mae: 327.553 - mean_q: 315.591 Interval 4402 (2200500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -8.1974 5 episodes - episode_reward: -892.412 [-1153.919, -561.937] - loss: 940.583 - mae: 329.009 - mean_q: 319.336 Interval 4403 (2201000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.6436 5 episodes - episode_reward: -793.127 [-1509.564, -462.724] - loss: 720.884 - mae: 326.375 - mean_q: 314.994 Interval 4404 (2201500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.0430 9 episodes - episode_reward: -384.232 [-595.582, -103.050] - loss: 830.707 - mae: 326.060 - mean_q: 315.478 Interval 4405 (2202000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.1642 5 episodes - episode_reward: -501.296 [-892.921, -271.260] - loss: 842.407 - mae: 325.301 - mean_q: 314.033 Interval 4406 (2202500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -9.6189 11 episodes - episode_reward: -429.531 [-809.727, -100.000] - loss: 839.688 - mae: 323.970 - mean_q: 312.965 Interval 4407 (2203000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.0390 7 episodes - episode_reward: -608.858 [-1043.745, -376.105] - loss: 756.645 - mae: 321.987 - mean_q: 309.159 Interval 4408 (2203500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.9122 5 episodes - episode_reward: -590.481 [-838.624, -442.122] - loss: 789.033 - mae: 317.252 - mean_q: 302.941 Interval 4409 (2204000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.3716 6 episodes - episode_reward: -426.974 [-616.689, -280.642] - loss: 725.792 - mae: 311.139 - mean_q: 294.403 Interval 4410 (2204500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.8394 7 episodes - episode_reward: -430.256 [-751.563, -108.732] - loss: 745.289 - mae: 309.460 - mean_q: 292.472 Interval 4411 (2205000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0485 5 episodes - episode_reward: -488.054 [-823.200, -237.141] - loss: 737.972 - mae: 305.839 - mean_q: 286.922 Interval 4412 (2205500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.5349 4 episodes - episode_reward: -549.846 [-1175.292, -238.516] - loss: 672.754 - mae: 299.290 - mean_q: 277.394 Interval 4413 (2206000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3400 4 episodes - episode_reward: -543.095 [-687.252, -405.770] - loss: 666.948 - mae: 301.006 - mean_q: 280.212 Interval 4414 (2206500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.0447 4 episodes - episode_reward: -550.847 [-797.721, -306.937] - loss: 651.729 - mae: 293.422 - mean_q: 269.263 Interval 4415 (2207000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1794 2 episodes - episode_reward: -745.298 [-904.557, -586.039] - loss: 652.497 - mae: 291.657 - mean_q: 267.666 Interval 4416 (2207500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9096 2 episodes - episode_reward: -903.066 [-1441.293, -364.838] - loss: 674.475 - mae: 291.661 - mean_q: 267.393 Interval 4417 (2208000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -6.3941 3 episodes - episode_reward: -1308.452 [-2349.334, -393.239] - loss: 586.391 - mae: 291.473 - mean_q: 267.826 Interval 4418 (2208500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.9522 6 episodes - episode_reward: -497.031 [-803.014, -255.414] - loss: 592.100 - mae: 285.512 - mean_q: 258.691 Interval 4419 (2209000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0105 8 episodes - episode_reward: -311.523 [-582.980, -100.000] - loss: 577.943 - mae: 281.906 - mean_q: 253.847 Interval 4420 (2209500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.4938 8 episodes - episode_reward: -301.762 [-677.372, -100.000] - loss: 663.848 - mae: 279.285 - mean_q: 249.349 Interval 4421 (2210000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.0686 8 episodes - episode_reward: -416.750 [-1547.894, -100.000] - loss: 585.743 - mae: 279.896 - mean_q: 249.870 Interval 4422 (2210500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.7805 3 episodes - episode_reward: -769.967 [-1225.806, -232.493] - loss: 658.627 - mae: 276.929 - mean_q: 244.412 Interval 4423 (2211000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.0436 5 episodes - episode_reward: -530.503 [-1427.557, -71.295] - loss: 667.822 - mae: 273.821 - mean_q: 240.066 Interval 4424 (2211500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.1009 5 episodes - episode_reward: -415.336 [-859.744, -100.000] - loss: 789.169 - mae: 277.965 - mean_q: 247.116 Interval 4425 (2212000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0930 Interval 4426 (2212500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1665 Interval 4427 (2213000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2029 Interval 4428 (2213500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.5407 2 episodes - episode_reward: -925.281 [-1183.749, -666.814] - loss: 768.128 - mae: 308.221 - mean_q: 291.249 Interval 4429 (2214000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3335 5 episodes - episode_reward: -432.001 [-557.463, -226.697] - loss: 688.872 - mae: 305.929 - mean_q: 287.769 Interval 4430 (2214500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.9045 6 episodes - episode_reward: -411.075 [-624.319, -86.531] - loss: 738.260 - mae: 313.314 - mean_q: 298.257 Interval 4431 (2215000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.6662 7 episodes - episode_reward: -558.553 [-945.514, -127.720] - loss: 770.350 - mae: 321.101 - mean_q: 309.797 Interval 4432 (2215500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9459 7 episodes - episode_reward: -266.622 [-608.865, -87.341] - loss: 740.255 - mae: 320.766 - mean_q: 308.084 Interval 4433 (2216000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0149 6 episodes - episode_reward: -398.820 [-620.444, -100.000] - loss: 659.778 - mae: 323.545 - mean_q: 311.642 Interval 4434 (2216500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.0151 6 episodes - episode_reward: -459.428 [-716.324, -238.516] - loss: 758.390 - mae: 314.005 - mean_q: 296.014 Interval 4435 (2217000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4517 5 episodes - episode_reward: -387.269 [-686.676, -219.888] - loss: 642.160 - mae: 305.468 - mean_q: 282.813 Interval 4436 (2217500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -6.7952 4 episodes - episode_reward: -889.645 [-2316.723, -351.546] - loss: 573.683 - mae: 302.808 - mean_q: 277.969 Interval 4437 (2218000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.8151 5 episodes - episode_reward: -491.448 [-995.384, -226.129] - loss: 570.804 - mae: 304.309 - mean_q: 279.489 Interval 4438 (2218500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.2370 6 episodes - episode_reward: -454.750 [-622.014, -138.696] - loss: 633.492 - mae: 304.321 - mean_q: 279.294 Interval 4439 (2219000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9961 2 episodes - episode_reward: -239.051 [-271.532, -206.569] - loss: 637.000 - mae: 310.283 - mean_q: 287.196 Interval 4440 (2219500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9729 4 episodes - episode_reward: -236.183 [-514.844, -100.000] - loss: 640.767 - mae: 309.973 - mean_q: 287.331 Interval 4441 (2220000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8087 1 episodes - episode_reward: -403.847 [-403.847, -403.847] - loss: 626.883 - mae: 315.213 - mean_q: 295.615 Interval 4442 (2220500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.8673 4 episodes - episode_reward: -591.475 [-787.916, -408.785] - loss: 620.479 - mae: 320.588 - mean_q: 304.145 Interval 4443 (2221000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7870 2 episodes - episode_reward: -508.589 [-742.875, -274.303] - loss: 611.774 - mae: 312.476 - mean_q: 292.313 Interval 4444 (2221500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1844 Interval 4445 (2222000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1885 Interval 4446 (2222500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1851 Interval 4447 (2223000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1706 Interval 4448 (2223500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1883 Interval 4449 (2224000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2906 Interval 4450 (2224500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.0727 Interval 4451 (2225000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1873 Interval 4452 (2225500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2456 Interval 4453 (2226000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.1194 Interval 4454 (2226500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.1549 5 episodes - episode_reward: -394.663 [-1178.517, -145.100] - loss: 693.420 - mae: 310.297 - mean_q: 290.173 Interval 4455 (2227000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0431 5 episodes - episode_reward: -208.167 [-272.710, -164.275] - loss: 686.481 - mae: 300.557 - mean_q: 275.078 Interval 4456 (2227500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0427 Interval 4457 (2228000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1330 Interval 4458 (2228500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2076 Interval 4459 (2229000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1793 Interval 4460 (2229500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2012 Interval 4461 (2230000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1761 Interval 4462 (2230500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1364 Interval 4463 (2231000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.4368 1 episodes - episode_reward: -761.849 [-761.849, -761.849] - loss: 856.184 - mae: 314.385 - mean_q: 297.576 Interval 4464 (2231500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6173 2 episodes - episode_reward: -145.589 [-155.095, -136.083] - loss: 825.711 - mae: 321.104 - mean_q: 308.693 Interval 4465 (2232000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3390 4 episodes - episode_reward: -170.807 [-283.305, -114.236] - loss: 932.723 - mae: 323.915 - mean_q: 313.357 Interval 4466 (2232500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2116 Interval 4467 (2233000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1314 Interval 4468 (2233500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3064 Interval 4469 (2234000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1014 Interval 4470 (2234500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1170 Interval 4471 (2235000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1586 2 episodes - episode_reward: -499.444 [-771.892, -226.996] - loss: 1071.489 - mae: 359.795 - mean_q: 374.751 Interval 4472 (2235500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0693 Interval 4473 (2236000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3485 Interval 4474 (2236500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1056 Interval 4475 (2237000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2854 1 episodes - episode_reward: -414.787 [-414.787, -414.787] - loss: 894.595 - mae: 383.089 - mean_q: 415.834 Interval 4476 (2237500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1793 Interval 4477 (2238000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2348 Interval 4478 (2238500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1124 Interval 4479 (2239000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7320 1 episodes - episode_reward: -591.713 [-591.713, -591.713] - loss: 932.749 - mae: 389.610 - mean_q: 428.267 Interval 4480 (2239500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2444 Interval 4481 (2240000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1325 Interval 4482 (2240500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2991 Interval 4483 (2241000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.4719 Interval 4484 (2241500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8016 3 episodes - episode_reward: -333.385 [-714.499, -100.000] - loss: 923.889 - mae: 397.098 - mean_q: 445.322 Interval 4485 (2242000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1715 Interval 4486 (2242500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2646 Interval 4487 (2243000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2359 Interval 4488 (2243500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1442 Interval 4489 (2244000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1256 Interval 4490 (2244500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2298 Interval 4491 (2245000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2024 Interval 4492 (2245500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2204 Interval 4493 (2246000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.1531 Interval 4494 (2246500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.0907 Interval 4495 (2247000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.4260 Interval 4496 (2247500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2308 3 episodes - episode_reward: -547.822 [-1268.172, -141.670] - loss: 736.631 - mae: 413.246 - mean_q: 483.841 Interval 4497 (2248000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2906 9 episodes - episode_reward: -182.090 [-239.839, -67.041] - loss: 914.021 - mae: 403.955 - mean_q: 470.173 Interval 4498 (2248500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7348 5 episodes - episode_reward: -194.749 [-276.229, -100.000] - loss: 828.273 - mae: 396.937 - mean_q: 461.576 Interval 4499 (2249000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1899 Interval 4500 (2249500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2036 Interval 4501 (2250000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3143 Interval 4502 (2250500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6335 4 episodes - episode_reward: -293.217 [-633.329, -106.757] - loss: 701.924 - mae: 386.899 - mean_q: 452.683 Interval 4503 (2251000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6432 2 episodes - episode_reward: -176.080 [-181.679, -170.481] - loss: 589.567 - mae: 384.724 - mean_q: 449.558 Interval 4504 (2251500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5007 3 episodes - episode_reward: -240.509 [-261.399, -230.001] - loss: 733.500 - mae: 382.459 - mean_q: 448.450 Interval 4505 (2252000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8941 2 episodes - episode_reward: -210.018 [-222.570, -197.466] - loss: 724.677 - mae: 380.462 - mean_q: 446.207 Interval 4506 (2252500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5107 1 episodes - episode_reward: -240.399 [-240.399, -240.399] - loss: 521.067 - mae: 372.800 - mean_q: 437.409 Interval 4507 (2253000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0101 5 episodes - episode_reward: -210.337 [-330.171, -80.165] - loss: 719.908 - mae: 369.502 - mean_q: 432.895 Interval 4508 (2253500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8444 2 episodes - episode_reward: -208.587 [-220.224, -196.950] - loss: 774.544 - mae: 370.095 - mean_q: 435.490 Interval 4509 (2254000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9092 5 episodes - episode_reward: -189.816 [-214.918, -168.364] - loss: 604.668 - mae: 369.568 - mean_q: 436.501 Interval 4510 (2254500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0841 9 episodes - episode_reward: -171.461 [-223.359, -124.826] - loss: 508.641 - mae: 365.788 - mean_q: 431.137 Interval 4511 (2255000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2001 5 episodes - episode_reward: -211.989 [-283.165, -180.073] - loss: 591.229 - mae: 361.030 - mean_q: 425.546 Interval 4512 (2255500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5079 8 episodes - episode_reward: -157.887 [-247.764, -100.000] - loss: 576.558 - mae: 355.132 - mean_q: 417.839 Interval 4513 (2256000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2785 5 episodes - episode_reward: -222.110 [-305.441, -158.511] - loss: 582.752 - mae: 352.319 - mean_q: 413.381 Interval 4514 (2256500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2118 7 episodes - episode_reward: -161.208 [-271.658, -102.083] - loss: 563.633 - mae: 347.480 - mean_q: 405.620 Interval 4515 (2257000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2966 9 episodes - episode_reward: -187.159 [-272.451, -100.000] - loss: 539.617 - mae: 347.778 - mean_q: 407.563 Interval 4516 (2257500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5073 7 episodes - episode_reward: -176.319 [-257.545, -71.769] - loss: 510.948 - mae: 343.125 - mean_q: 400.778 Interval 4517 (2258000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9666 6 episodes - episode_reward: -162.922 [-268.794, -60.671] - loss: 473.184 - mae: 341.087 - mean_q: 399.348 Interval 4518 (2258500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3214 8 episodes - episode_reward: -149.638 [-228.018, -1.702] - loss: 519.268 - mae: 340.789 - mean_q: 398.818 Interval 4519 (2259000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8161 4 episodes - episode_reward: -216.056 [-254.066, -169.257] - loss: 482.127 - mae: 343.577 - mean_q: 404.721 Interval 4520 (2259500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0271 7 episodes - episode_reward: -217.991 [-250.371, -161.849] - loss: 541.428 - mae: 343.611 - mean_q: 403.972 Interval 4521 (2260000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5637 7 episodes - episode_reward: -182.838 [-271.991, -51.416] - loss: 527.754 - mae: 340.688 - mean_q: 399.621 Interval 4522 (2260500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.9806 6 episodes - episode_reward: -86.283 [-162.211, -30.028] - loss: 503.072 - mae: 340.533 - mean_q: 398.900 Interval 4523 (2261000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0662 2 episodes - episode_reward: -258.985 [-298.026, -219.945] - loss: 546.545 - mae: 339.291 - mean_q: 397.852 Interval 4524 (2261500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8302 5 episodes - episode_reward: -168.310 [-228.012, -87.346] - loss: 537.241 - mae: 340.056 - mean_q: 401.107 Interval 4525 (2262000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1591 5 episodes - episode_reward: -220.994 [-339.507, -129.014] - loss: 556.603 - mae: 329.270 - mean_q: 383.839 Interval 4526 (2262500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7762 7 episodes - episode_reward: -207.031 [-260.909, -146.099] - loss: 584.849 - mae: 324.780 - mean_q: 377.197 Interval 4527 (2263000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3097 8 episodes - episode_reward: -207.232 [-256.303, -163.097] - loss: 468.863 - mae: 318.256 - mean_q: 366.254 Interval 4528 (2263500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1749 6 episodes - episode_reward: -182.394 [-213.552, -130.864] - loss: 528.860 - mae: 317.802 - mean_q: 367.173 Interval 4529 (2264000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.0425 5 episodes - episode_reward: -303.081 [-405.897, -243.401] - loss: 525.583 - mae: 321.375 - mean_q: 371.889 Interval 4530 (2264500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -9.1308 9 episodes - episode_reward: -564.078 [-1100.736, -229.500] - loss: 451.524 - mae: 319.101 - mean_q: 368.756 Interval 4531 (2265000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8332 7 episodes - episode_reward: -347.815 [-453.766, -172.795] - loss: 510.737 - mae: 313.987 - mean_q: 361.004 Interval 4532 (2265500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4645 1 episodes - episode_reward: -566.106 [-566.106, -566.106] - loss: 484.688 - mae: 312.975 - mean_q: 361.093 Interval 4533 (2266000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.1447 6 episodes - episode_reward: -535.228 [-830.051, -326.916] - loss: 577.623 - mae: 311.927 - mean_q: 360.451 Interval 4534 (2266500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2038 5 episodes - episode_reward: -517.382 [-1169.939, -264.407] - loss: 492.254 - mae: 308.324 - mean_q: 357.398 Interval 4535 (2267000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2381 4 episodes - episode_reward: -535.851 [-853.307, -134.102] - loss: 476.181 - mae: 306.277 - mean_q: 356.163 Interval 4536 (2267500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1924 4 episodes - episode_reward: -553.827 [-657.862, -344.849] - loss: 522.388 - mae: 309.630 - mean_q: 362.797 Interval 4537 (2268000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.5409 2 episodes - episode_reward: -1146.680 [-1416.327, -877.033] - loss: 433.169 - mae: 306.579 - mean_q: 356.442 Interval 4538 (2268500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.0389 9 episodes - episode_reward: -416.543 [-826.011, -106.964] - loss: 570.127 - mae: 309.039 - mean_q: 362.594 Interval 4539 (2269000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5391 7 episodes - episode_reward: -377.623 [-1120.372, -134.526] - loss: 545.587 - mae: 306.206 - mean_q: 355.856 Interval 4540 (2269500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.5128 8 episodes - episode_reward: -426.736 [-704.276, -130.767] - loss: 509.282 - mae: 300.913 - mean_q: 347.522 Interval 4541 (2270000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.6295 4 episodes - episode_reward: -604.031 [-946.986, -163.376] - loss: 455.414 - mae: 304.983 - mean_q: 354.832 Interval 4542 (2270500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8749 4 episodes - episode_reward: -596.125 [-893.924, -450.604] - loss: 495.113 - mae: 306.152 - mean_q: 354.379 Interval 4543 (2271000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.0708 5 episodes - episode_reward: -593.882 [-941.281, -106.003] - loss: 461.977 - mae: 306.251 - mean_q: 354.571 Interval 4544 (2271500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.3601 5 episodes - episode_reward: -575.006 [-983.579, -379.450] - loss: 555.076 - mae: 303.792 - mean_q: 349.521 Interval 4545 (2272000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.3979 6 episodes - episode_reward: -542.772 [-798.802, -275.097] - loss: 456.618 - mae: 302.377 - mean_q: 346.525 Interval 4546 (2272500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.1786 5 episodes - episode_reward: -629.872 [-1050.226, -319.028] - loss: 472.181 - mae: 297.857 - mean_q: 338.164 Interval 4547 (2273000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.2097 8 episodes - episode_reward: -541.653 [-1094.208, -259.614] - loss: 449.178 - mae: 303.305 - mean_q: 345.605 Interval 4548 (2273500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.2499 3 episodes - episode_reward: -619.478 [-1020.272, -229.741] - loss: 519.368 - mae: 301.503 - mean_q: 340.715 Interval 4549 (2274000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.7970 6 episodes - episode_reward: -598.418 [-861.071, -423.743] - loss: 533.418 - mae: 302.025 - mean_q: 341.238 Interval 4550 (2274500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.2954 7 episodes - episode_reward: -537.118 [-763.913, -100.000] - loss: 543.556 - mae: 302.340 - mean_q: 339.708 Interval 4551 (2275000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5711 2 episodes - episode_reward: -594.245 [-899.886, -288.604] - loss: 645.703 - mae: 306.812 - mean_q: 344.149 Interval 4552 (2275500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.2615 5 episodes - episode_reward: -714.744 [-977.620, -372.621] - loss: 553.410 - mae: 303.347 - mean_q: 337.405 Interval 4553 (2276000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.3787 8 episodes - episode_reward: -480.073 [-771.419, -137.004] - loss: 619.256 - mae: 307.813 - mean_q: 344.203 Interval 4554 (2276500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.1002 4 episodes - episode_reward: -510.153 [-893.947, -100.000] - loss: 503.066 - mae: 311.497 - mean_q: 347.758 Interval 4555 (2277000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8392 5 episodes - episode_reward: -500.518 [-850.834, -288.406] - loss: 478.308 - mae: 314.117 - mean_q: 351.686 Interval 4556 (2277500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0565 2 episodes - episode_reward: -758.751 [-896.398, -621.105] - loss: 502.860 - mae: 319.294 - mean_q: 356.781 Interval 4557 (2278000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1636 Interval 4558 (2278500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1615 Interval 4559 (2279000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2896 1 episodes - episode_reward: -740.943 [-740.943, -740.943] - loss: 518.097 - mae: 330.060 - mean_q: 368.939 Interval 4560 (2279500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3487 2 episodes - episode_reward: -353.126 [-542.159, -164.094] - loss: 504.502 - mae: 337.151 - mean_q: 379.648 Interval 4561 (2280000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1759 Interval 4562 (2280500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4471 1 episodes - episode_reward: -822.912 [-822.912, -822.912] - loss: 539.712 - mae: 336.712 - mean_q: 376.646 Interval 4563 (2281000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8876 Interval 4564 (2281500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3158 2 episodes - episode_reward: -565.946 [-607.208, -524.684] - loss: 516.422 - mae: 325.213 - mean_q: 358.753 Interval 4565 (2282000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6186 Interval 4566 (2282500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0422 4 episodes - episode_reward: -445.785 [-868.742, -126.164] - loss: 501.657 - mae: 321.991 - mean_q: 352.069 Interval 4567 (2283000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1529 Interval 4568 (2283500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1074 Interval 4569 (2284000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1620 Interval 4570 (2284500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1938 Interval 4571 (2285000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1634 Interval 4572 (2285500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -5.0308 4 episodes - episode_reward: -706.619 [-1147.825, -100.000] - loss: 454.078 - mae: 306.109 - mean_q: 328.102 Interval 4573 (2286000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1452 4 episodes - episode_reward: -669.274 [-1428.801, -153.417] - loss: 469.948 - mae: 292.375 - mean_q: 306.430 Interval 4574 (2286500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1884 Interval 4575 (2287000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4612 Interval 4576 (2287500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0312 4 episodes - episode_reward: -579.674 [-737.363, -484.543] - loss: 511.412 - mae: 275.847 - mean_q: 282.278 Interval 4577 (2288000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7874 2 episodes - episode_reward: -356.784 [-613.568, -100.000] - loss: 480.684 - mae: 275.277 - mean_q: 282.202 Interval 4578 (2288500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3585 4 episodes - episode_reward: -538.626 [-929.684, -165.977] - loss: 453.867 - mae: 266.212 - mean_q: 267.696 Interval 4579 (2289000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8805 4 episodes - episode_reward: -498.712 [-617.044, -415.949] - loss: 488.334 - mae: 257.187 - mean_q: 253.621 Interval 4580 (2289500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2701 6 episodes - episode_reward: -406.978 [-600.183, -100.000] - loss: 475.285 - mae: 249.103 - mean_q: 240.243 Interval 4581 (2290000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9246 4 episodes - episode_reward: -551.589 [-819.426, -85.639] - loss: 490.898 - mae: 240.524 - mean_q: 227.200 Interval 4582 (2290500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8386 3 episodes - episode_reward: -475.522 [-568.381, -349.116] - loss: 448.464 - mae: 232.700 - mean_q: 215.086 Interval 4583 (2291000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8417 2 episodes - episode_reward: -822.994 [-1103.830, -542.157] - loss: 390.794 - mae: 228.498 - mean_q: 208.183 Interval 4584 (2291500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6490 Interval 4585 (2292000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.8923 4 episodes - episode_reward: -548.909 [-619.900, -471.538] - loss: 422.396 - mae: 218.405 - mean_q: 192.076 Interval 4586 (2292500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.8700 6 episodes - episode_reward: -478.579 [-632.073, -207.842] - loss: 403.305 - mae: 212.670 - mean_q: 183.847 Interval 4587 (2293000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7154 6 episodes - episode_reward: -382.639 [-526.563, -177.071] - loss: 374.425 - mae: 207.378 - mean_q: 173.593 Interval 4588 (2293500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9009 5 episodes - episode_reward: -404.283 [-810.779, -174.689] - loss: 367.888 - mae: 199.848 - mean_q: 160.688 Interval 4589 (2294000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1716 7 episodes - episode_reward: -348.938 [-1003.867, -102.434] - loss: 408.138 - mae: 194.334 - mean_q: 152.199 Interval 4590 (2294500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2549 4 episodes - episode_reward: -423.437 [-595.165, -291.952] - loss: 361.284 - mae: 190.359 - mean_q: 145.617 Interval 4591 (2295000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2917 5 episodes - episode_reward: -334.338 [-508.002, -239.235] - loss: 420.293 - mae: 187.110 - mean_q: 137.953 Interval 4592 (2295500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1001 3 episodes - episode_reward: -456.587 [-526.256, -375.515] - loss: 336.182 - mae: 181.020 - mean_q: 127.232 Interval 4593 (2296000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3878 4 episodes - episode_reward: -369.126 [-565.451, -134.505] - loss: 407.603 - mae: 179.805 - mean_q: 126.091 Interval 4594 (2296500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2744 Interval 4595 (2297000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.1453 8 episodes - episode_reward: -392.196 [-611.510, -100.000] - loss: 362.635 - mae: 174.411 - mean_q: 115.205 Interval 4596 (2297500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2131 6 episodes - episode_reward: -383.434 [-665.593, -100.000] - loss: 320.937 - mae: 169.622 - mean_q: 108.722 Interval 4597 (2298000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8775 4 episodes - episode_reward: -437.714 [-628.371, -309.770] - loss: 372.278 - mae: 168.148 - mean_q: 105.791 Interval 4598 (2298500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5012 6 episodes - episode_reward: -454.875 [-820.475, -125.018] - loss: 313.313 - mae: 162.328 - mean_q: 96.656 Interval 4599 (2299000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3035 5 episodes - episode_reward: -515.873 [-753.316, -153.382] - loss: 299.769 - mae: 157.995 - mean_q: 89.146 Interval 4600 (2299500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3337 3 episodes - episode_reward: -612.724 [-722.210, -515.840] - loss: 326.211 - mae: 152.724 - mean_q: 78.171 Interval 4601 (2300000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3912 6 episodes - episode_reward: -488.037 [-628.379, -338.123] - loss: 286.644 - mae: 150.409 - mean_q: 75.111 Interval 4602 (2300500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.5207 6 episodes - episode_reward: -571.776 [-1038.331, -375.621] - loss: 346.359 - mae: 149.072 - mean_q: 73.026 Interval 4603 (2301000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7493 3 episodes - episode_reward: -635.654 [-1304.158, -270.844] - loss: 263.902 - mae: 146.582 - mean_q: 69.304 Interval 4604 (2301500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.5553 5 episodes - episode_reward: -449.057 [-646.463, -236.236] - loss: 306.009 - mae: 144.166 - mean_q: 63.684 Interval 4605 (2302000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7263 5 episodes - episode_reward: -480.472 [-624.661, -214.107] - loss: 321.485 - mae: 144.013 - mean_q: 64.281 Interval 4606 (2302500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0168 3 episodes - episode_reward: -149.247 [-166.385, -122.525] - loss: 321.723 - mae: 146.077 - mean_q: 66.243 Interval 4607 (2303000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4428 9 episodes - episode_reward: -195.294 [-269.876, -145.827] - loss: 278.983 - mae: 146.668 - mean_q: 67.231 Interval 4608 (2303500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2630 7 episodes - episode_reward: -154.969 [-196.191, -110.669] - loss: 300.334 - mae: 147.565 - mean_q: 69.150 Interval 4609 (2304000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1615 8 episodes - episode_reward: -196.727 [-298.838, 1.271] - loss: 254.277 - mae: 145.850 - mean_q: 65.488 Interval 4610 (2304500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6480 4 episodes - episode_reward: -217.325 [-391.385, -115.040] - loss: 280.937 - mae: 146.558 - mean_q: 67.307 Interval 4611 (2305000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7597 7 episodes - episode_reward: -182.478 [-348.213, -121.409] - loss: 288.156 - mae: 147.491 - mean_q: 70.086 Interval 4612 (2305500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2054 9 episodes - episode_reward: -190.051 [-253.515, -109.234] - loss: 268.950 - mae: 144.284 - mean_q: 64.370 Interval 4613 (2306000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9862 8 episodes - episode_reward: -183.675 [-271.171, -100.000] - loss: 263.980 - mae: 141.497 - mean_q: 59.966 Interval 4614 (2306500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0681 5 episodes - episode_reward: -204.327 [-263.231, -92.483] - loss: 234.313 - mae: 140.064 - mean_q: 57.331 Interval 4615 (2307000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4543 7 episodes - episode_reward: -177.139 [-226.308, -143.604] - loss: 248.298 - mae: 138.363 - mean_q: 56.359 Interval 4616 (2307500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8520 7 episodes - episode_reward: -199.519 [-378.052, -140.739] - loss: 195.022 - mae: 137.434 - mean_q: 54.109 Interval 4617 (2308000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9020 6 episodes - episode_reward: -166.832 [-231.795, -71.631] - loss: 220.605 - mae: 136.240 - mean_q: 52.778 Interval 4618 (2308500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7798 7 episodes - episode_reward: -198.660 [-232.124, -140.675] - loss: 215.378 - mae: 133.340 - mean_q: 47.460 Interval 4619 (2309000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4623 7 episodes - episode_reward: -162.391 [-257.709, -40.899] - loss: 220.066 - mae: 133.463 - mean_q: 49.018 Interval 4620 (2309500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.6737 11 episodes - episode_reward: -177.180 [-236.729, -100.000] - loss: 170.763 - mae: 131.671 - mean_q: 45.067 Interval 4621 (2310000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3062 8 episodes - episode_reward: -207.162 [-385.794, -113.834] - loss: 213.754 - mae: 134.220 - mean_q: 49.960 Interval 4622 (2310500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1796 6 episodes - episode_reward: -170.398 [-257.295, -100.208] - loss: 203.798 - mae: 132.123 - mean_q: 44.945 Interval 4623 (2311000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7586 9 episodes - episode_reward: -152.013 [-200.783, -31.903] - loss: 201.180 - mae: 129.886 - mean_q: 40.342 Interval 4624 (2311500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1464 6 episodes - episode_reward: -174.237 [-206.925, -129.201] - loss: 196.584 - mae: 128.658 - mean_q: 39.700 Interval 4625 (2312000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9955 7 episodes - episode_reward: -223.774 [-267.507, -171.432] - loss: 183.302 - mae: 126.847 - mean_q: 36.649 Interval 4626 (2312500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0366 8 episodes - episode_reward: -195.854 [-248.895, -100.000] - loss: 200.615 - mae: 124.310 - mean_q: 31.482 Interval 4627 (2313000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9565 8 episodes - episode_reward: -168.325 [-233.130, -124.081] - loss: 185.354 - mae: 125.769 - mean_q: 36.628 Interval 4628 (2313500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0313 8 episodes - episode_reward: -202.083 [-289.053, -157.199] - loss: 154.369 - mae: 125.422 - mean_q: 34.359 Interval 4629 (2314000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0540 8 episodes - episode_reward: -194.655 [-218.817, -150.907] - loss: 181.980 - mae: 124.665 - mean_q: 34.935 Interval 4630 (2314500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6129 5 episodes - episode_reward: -155.539 [-177.448, -133.743] - loss: 193.789 - mae: 122.865 - mean_q: 30.199 Interval 4631 (2315000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4600 7 episodes - episode_reward: -173.671 [-213.549, -134.151] - loss: 171.986 - mae: 121.905 - mean_q: 28.663 Interval 4632 (2315500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8839 7 episodes - episode_reward: -205.952 [-255.891, -163.766] - loss: 173.650 - mae: 121.818 - mean_q: 29.040 Interval 4633 (2316000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0527 6 episodes - episode_reward: -210.330 [-455.443, -100.000] - loss: 162.593 - mae: 121.926 - mean_q: 30.545 Interval 4634 (2316500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.9995 7 episodes - episode_reward: -575.528 [-1026.696, -394.586] - loss: 145.803 - mae: 120.024 - mean_q: 26.795 Interval 4635 (2317000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.4230 8 episodes - episode_reward: -421.392 [-961.000, -143.435] - loss: 180.332 - mae: 120.356 - mean_q: 27.865 Interval 4636 (2317500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9746 4 episodes - episode_reward: -364.417 [-829.308, -99.396] - loss: 180.796 - mae: 119.339 - mean_q: 24.788 Interval 4637 (2318000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.1131 7 episodes - episode_reward: -513.851 [-1050.222, -157.236] - loss: 141.780 - mae: 119.351 - mean_q: 24.971 Interval 4638 (2318500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.7566 7 episodes - episode_reward: -488.075 [-877.324, -157.732] - loss: 145.630 - mae: 120.120 - mean_q: 26.634 Interval 4639 (2319000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.4540 8 episodes - episode_reward: -475.171 [-711.222, -248.022] - loss: 157.118 - mae: 120.507 - mean_q: 27.551 Interval 4640 (2319500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.0995 8 episodes - episode_reward: -503.926 [-816.321, -296.962] - loss: 166.689 - mae: 120.431 - mean_q: 27.819 Interval 4641 (2320000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0465 5 episodes - episode_reward: -454.386 [-878.031, -174.851] - loss: 175.486 - mae: 121.907 - mean_q: 29.758 Interval 4642 (2320500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1581 6 episodes - episode_reward: -216.332 [-517.713, 5.224] - loss: 167.453 - mae: 123.532 - mean_q: 32.374 Interval 4643 (2321000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3183 4 episodes - episode_reward: -184.865 [-236.189, -125.627] - loss: 161.294 - mae: 123.953 - mean_q: 32.729 Interval 4644 (2321500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0236 5 episodes - episode_reward: -171.376 [-252.037, -90.637] - loss: 176.493 - mae: 125.565 - mean_q: 35.747 Interval 4645 (2322000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.4271 4 episodes - episode_reward: -192.704 [-247.240, -146.386] - loss: 165.859 - mae: 125.217 - mean_q: 36.715 Interval 4646 (2322500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3905 8 episodes - episode_reward: -218.126 [-311.361, -116.769] - loss: 136.866 - mae: 123.653 - mean_q: 32.609 Interval 4647 (2323000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6595 9 episodes - episode_reward: -141.912 [-197.964, -9.104] - loss: 149.558 - mae: 124.002 - mean_q: 33.104 Interval 4648 (2323500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9596 6 episodes - episode_reward: -173.849 [-272.866, -124.536] - loss: 136.860 - mae: 124.439 - mean_q: 35.272 Interval 4649 (2324000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6017 7 episodes - episode_reward: -182.470 [-292.013, -80.003] - loss: 140.442 - mae: 123.433 - mean_q: 33.309 Interval 4650 (2324500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3503 7 episodes - episode_reward: -164.901 [-275.750, -110.552] - loss: 138.559 - mae: 122.992 - mean_q: 32.424 Interval 4651 (2325000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0555 8 episodes - episode_reward: -191.334 [-318.372, -132.529] - loss: 142.197 - mae: 125.607 - mean_q: 35.474 Interval 4652 (2325500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4945 7 episodes - episode_reward: -171.361 [-236.388, -48.507] - loss: 152.533 - mae: 126.131 - mean_q: 35.638 Interval 4653 (2326000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2127 6 episodes - episode_reward: -194.666 [-238.871, -145.919] - loss: 142.533 - mae: 125.584 - mean_q: 33.634 Interval 4654 (2326500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8547 7 episodes - episode_reward: -198.425 [-275.238, -100.000] - loss: 138.260 - mae: 125.548 - mean_q: 32.438 Interval 4655 (2327000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9104 6 episodes - episode_reward: -168.024 [-237.556, -119.553] - loss: 145.156 - mae: 125.122 - mean_q: 30.781 Interval 4656 (2327500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9558 7 episodes - episode_reward: -203.497 [-364.551, -127.696] - loss: 162.829 - mae: 126.158 - mean_q: 32.577 Interval 4657 (2328000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0023 7 episodes - episode_reward: -214.983 [-421.743, -100.000] - loss: 170.904 - mae: 127.086 - mean_q: 32.174 Interval 4658 (2328500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4740 7 episodes - episode_reward: -168.089 [-221.080, -138.804] - loss: 180.664 - mae: 126.647 - mean_q: 28.282 Interval 4659 (2329000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6072 8 episodes - episode_reward: -167.700 [-263.727, -11.509] - loss: 224.290 - mae: 127.845 - mean_q: 29.295 Interval 4660 (2329500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6205 8 episodes - episode_reward: -168.115 [-302.463, -18.144] - loss: 292.798 - mae: 131.693 - mean_q: 33.079 Interval 4661 (2330000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9437 8 episodes - episode_reward: -182.021 [-250.079, -127.362] - loss: 324.302 - mae: 134.696 - mean_q: 35.232 Interval 4662 (2330500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0330 8 episodes - episode_reward: -193.142 [-256.545, -162.031] - loss: 413.794 - mae: 143.446 - mean_q: 46.167 Interval 4663 (2331000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4670 9 episodes - episode_reward: -195.042 [-294.215, -126.118] - loss: 499.947 - mae: 154.495 - mean_q: 60.175 Interval 4664 (2331500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5383 7 episodes - episode_reward: -181.131 [-285.354, -126.415] - loss: 588.681 - mae: 162.758 - mean_q: 69.421 Interval 4665 (2332000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7790 6 episodes - episode_reward: -217.102 [-336.915, -132.639] - loss: 707.047 - mae: 176.372 - mean_q: 87.806 Interval 4666 (2332500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.9936 9 episodes - episode_reward: -387.812 [-665.817, -171.571] - loss: 972.306 - mae: 188.767 - mean_q: 102.340 Interval 4667 (2333000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -8.4764 8 episodes - episode_reward: -541.576 [-1164.411, -239.007] - loss: 1335.033 - mae: 199.068 - mean_q: 114.645 Interval 4668 (2333500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.7264 9 episodes - episode_reward: -428.141 [-753.911, -100.000] - loss: 1508.315 - mae: 213.333 - mean_q: 131.906 Interval 4669 (2334000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.1655 7 episodes - episode_reward: -454.170 [-599.897, -346.871] - loss: 1760.714 - mae: 230.696 - mean_q: 156.044 Interval 4670 (2334500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.2908 7 episodes - episode_reward: -631.430 [-974.479, -428.136] - loss: 2588.351 - mae: 252.818 - mean_q: 182.532 Interval 4671 (2335000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -9.1742 7 episodes - episode_reward: -662.309 [-975.098, -461.360] - loss: 3093.285 - mae: 274.759 - mean_q: 209.250 Interval 4672 (2335500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.4824 7 episodes - episode_reward: -617.238 [-972.220, -311.118] - loss: 3153.194 - mae: 293.003 - mean_q: 231.664 Interval 4673 (2336000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.3510 9 episodes - episode_reward: -423.695 [-622.187, -165.627] - loss: 3623.790 - mae: 300.744 - mean_q: 240.870 Interval 4674 (2336500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.4646 9 episodes - episode_reward: -501.580 [-833.319, -104.040] - loss: 3795.098 - mae: 308.782 - mean_q: 249.818 Interval 4675 (2337000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.0552 9 episodes - episode_reward: -451.283 [-789.559, -100.000] - loss: 4376.597 - mae: 331.651 - mean_q: 280.729 Interval 4676 (2337500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.5903 7 episodes - episode_reward: -509.752 [-788.897, -100.000] - loss: 6190.733 - mae: 333.829 - mean_q: 280.284 Interval 4677 (2338000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.4252 8 episodes - episode_reward: -558.544 [-791.080, -410.048] - loss: 5471.217 - mae: 320.853 - mean_q: 259.409 Interval 4678 (2338500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.3452 7 episodes - episode_reward: -488.419 [-901.870, -111.259] - loss: 5351.500 - mae: 324.151 - mean_q: 262.687 Interval 4679 (2339000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.2144 8 episodes - episode_reward: -549.458 [-859.801, -100.000] - loss: 4832.814 - mae: 325.394 - mean_q: 264.238 Interval 4680 (2339500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.1543 7 episodes - episode_reward: -542.689 [-1030.030, -100.000] - loss: 5248.937 - mae: 327.360 - mean_q: 268.109 Interval 4681 (2340000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.5398 9 episodes - episode_reward: -478.663 [-894.793, -90.206] - loss: 4834.081 - mae: 302.965 - mean_q: 234.011 Interval 4682 (2340500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.4348 7 episodes - episode_reward: -554.100 [-1088.603, -390.695] - loss: 4082.542 - mae: 297.128 - mean_q: 226.300 Interval 4683 (2341000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.0586 7 episodes - episode_reward: -588.893 [-1083.861, -136.814] - loss: 3746.169 - mae: 279.878 - mean_q: 201.815 Interval 4684 (2341500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8981 5 episodes - episode_reward: -383.036 [-529.178, -329.630] - loss: 3346.582 - mae: 279.027 - mean_q: 202.486 Interval 4685 (2342000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.4628 5 episodes - episode_reward: -831.796 [-1519.586, -313.577] - loss: 3787.433 - mae: 277.884 - mean_q: 200.403 Interval 4686 (2342500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.0364 4 episodes - episode_reward: -594.693 [-1142.050, -306.962] - loss: 3322.044 - mae: 264.965 - mean_q: 183.028 Interval 4687 (2343000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.8025 7 episodes - episode_reward: -581.355 [-1350.036, -213.760] - loss: 4274.391 - mae: 267.732 - mean_q: 187.508 Interval 4688 (2343500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0054 5 episodes - episode_reward: -513.331 [-933.871, -181.324] - loss: 3559.677 - mae: 266.550 - mean_q: 186.680 Interval 4689 (2344000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3316 3 episodes - episode_reward: -620.455 [-940.672, -235.370] - loss: 3529.960 - mae: 253.631 - mean_q: 168.160 Interval 4690 (2344500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.4660 6 episodes - episode_reward: -497.641 [-926.188, -126.093] - loss: 3774.940 - mae: 249.792 - mean_q: 163.098 Interval 4691 (2345000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5550 1 episodes - episode_reward: -2004.706 [-2004.706, -2004.706] - loss: 3125.037 - mae: 246.115 - mean_q: 157.797 Interval 4692 (2345500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1150 3 episodes - episode_reward: -892.327 [-1802.106, -318.334] - loss: 2705.705 - mae: 241.142 - mean_q: 150.067 Interval 4693 (2346000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.3237 4 episodes - episode_reward: -804.961 [-1136.682, -662.336] - loss: 2546.794 - mae: 228.777 - mean_q: 133.143 Interval 4694 (2346500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.3461 6 episodes - episode_reward: -539.721 [-1035.637, -194.110] - loss: 2522.283 - mae: 218.807 - mean_q: 117.638 Interval 4695 (2347000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.9250 3 episodes - episode_reward: -938.432 [-1555.015, -496.183] - loss: 2472.536 - mae: 218.956 - mean_q: 117.135 Interval 4696 (2347500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6844 4 episodes - episode_reward: -582.030 [-977.142, -248.518] - loss: 2494.095 - mae: 213.410 - mean_q: 109.405 Interval 4697 (2348000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0342 5 episodes - episode_reward: -552.135 [-906.717, -328.797] - loss: 2216.885 - mae: 210.023 - mean_q: 105.749 Interval 4698 (2348500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2977 8 episodes - episode_reward: -328.150 [-504.757, -133.267] - loss: 2232.012 - mae: 218.926 - mean_q: 117.759 Interval 4699 (2349000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.4891 6 episodes - episode_reward: -543.305 [-897.957, -100.000] - loss: 2264.093 - mae: 218.237 - mean_q: 115.415 Interval 4700 (2349500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8834 2 episodes - episode_reward: -958.355 [-964.328, -952.381] - loss: 2081.407 - mae: 207.722 - mean_q: 99.947 Interval 4701 (2350000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8869 3 episodes - episode_reward: -606.766 [-710.814, -420.923] - loss: 2121.085 - mae: 219.214 - mean_q: 117.814 Interval 4702 (2350500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.5215 6 episodes - episode_reward: -646.467 [-1270.761, -190.925] - loss: 2734.716 - mae: 216.236 - mean_q: 113.225 Interval 4703 (2351000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0390 2 episodes - episode_reward: -706.166 [-1028.789, -383.543] - loss: 2071.762 - mae: 207.527 - mean_q: 99.446 Interval 4704 (2351500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.1360 4 episodes - episode_reward: -622.210 [-1199.781, -273.001] - loss: 1965.019 - mae: 207.004 - mean_q: 99.059 Interval 4705 (2352000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7917 6 episodes - episode_reward: -435.840 [-729.351, -100.000] - loss: 1960.415 - mae: 196.312 - mean_q: 84.213 Interval 4706 (2352500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4186 5 episodes - episode_reward: -451.673 [-756.704, -162.052] - loss: 2046.564 - mae: 199.506 - mean_q: 87.763 Interval 4707 (2353000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.1616 3 episodes - episode_reward: -544.040 [-647.245, -459.304] - loss: 2432.148 - mae: 198.136 - mean_q: 85.890 Interval 4708 (2353500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2809 6 episodes - episode_reward: -425.895 [-858.242, -158.940] - loss: 2543.900 - mae: 208.637 - mean_q: 101.796 Interval 4709 (2354000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4830 1 episodes - episode_reward: -1135.793 [-1135.793, -1135.793] - loss: 2768.965 - mae: 200.331 - mean_q: 90.879 Interval 4710 (2354500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4914 6 episodes - episode_reward: -439.263 [-1000.026, -108.842] - loss: 2517.412 - mae: 204.676 - mean_q: 97.037 Interval 4711 (2355000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.6592 6 episodes - episode_reward: -506.788 [-995.192, -111.881] - loss: 2589.099 - mae: 206.948 - mean_q: 100.393 Interval 4712 (2355500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.1756 5 episodes - episode_reward: -504.665 [-748.761, -208.906] - loss: 2696.044 - mae: 208.443 - mean_q: 103.306 Interval 4713 (2356000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3574 4 episodes - episode_reward: -560.172 [-1075.503, -117.898] - loss: 3037.943 - mae: 222.783 - mean_q: 123.837 Interval 4714 (2356500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.3283 3 episodes - episode_reward: -643.420 [-1003.044, -455.690] - loss: 2627.934 - mae: 212.979 - mean_q: 109.705 Interval 4715 (2357000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0608 3 episodes - episode_reward: -626.311 [-684.903, -556.934] - loss: 2602.912 - mae: 225.412 - mean_q: 128.156 Interval 4716 (2357500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6635 6 episodes - episode_reward: -700.131 [-2441.984, -100.000] - loss: 2471.415 - mae: 218.097 - mean_q: 118.528 Interval 4717 (2358000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.5283 4 episodes - episode_reward: -552.803 [-1014.076, -203.590] - loss: 2730.203 - mae: 225.282 - mean_q: 129.629 Interval 4718 (2358500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9765 3 episodes - episode_reward: -631.926 [-885.723, -386.497] - loss: 2638.879 - mae: 221.028 - mean_q: 122.099 Interval 4719 (2359000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4509 6 episodes - episode_reward: -304.574 [-497.767, -120.583] - loss: 2585.619 - mae: 230.720 - mean_q: 138.491 Interval 4720 (2359500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4108 5 episodes - episode_reward: -324.505 [-646.308, -100.000] - loss: 3109.796 - mae: 237.633 - mean_q: 148.410 Interval 4721 (2360000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5682 1 episodes - episode_reward: -1339.888 [-1339.888, -1339.888] - loss: 2594.692 - mae: 240.526 - mean_q: 153.843 Interval 4722 (2360500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.8573 3 episodes - episode_reward: -670.823 [-1003.388, -474.654] - loss: 2511.159 - mae: 242.408 - mean_q: 155.856 Interval 4723 (2361000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3050 6 episodes - episode_reward: -188.533 [-241.269, -112.442] - loss: 2680.428 - mae: 243.126 - mean_q: 160.002 Interval 4724 (2361500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9634 8 episodes - episode_reward: -182.258 [-233.216, -114.595] - loss: 2506.006 - mae: 246.660 - mean_q: 164.661 Interval 4725 (2362000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6531 7 episodes - episode_reward: -192.174 [-275.627, -105.119] - loss: 2865.068 - mae: 256.925 - mean_q: 178.943 Interval 4726 (2362500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8630 5 episodes - episode_reward: -183.931 [-239.254, -100.725] - loss: 2751.104 - mae: 265.620 - mean_q: 190.298 Interval 4727 (2363000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6432 5 episodes - episode_reward: -167.262 [-227.217, -100.711] - loss: 2666.882 - mae: 261.531 - mean_q: 183.463 Interval 4728 (2363500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1760 6 episodes - episode_reward: -181.866 [-251.208, -125.112] - loss: 2759.530 - mae: 272.391 - mean_q: 201.005 Interval 4729 (2364000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8389 8 episodes - episode_reward: -177.846 [-220.837, -100.000] - loss: 2565.233 - mae: 271.008 - mean_q: 199.586 Interval 4730 (2364500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.6319 6 episodes - episode_reward: -550.785 [-992.714, -100.000] - loss: 2878.697 - mae: 286.306 - mean_q: 221.167 Interval 4731 (2365000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.8213 6 episodes - episode_reward: -560.332 [-1030.559, -376.967] - loss: 2773.573 - mae: 284.259 - mean_q: 220.105 Interval 4732 (2365500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.0653 5 episodes - episode_reward: -556.656 [-950.411, -167.875] - loss: 2820.117 - mae: 284.009 - mean_q: 219.229 Interval 4733 (2366000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.1196 6 episodes - episode_reward: -650.827 [-1078.426, -323.044] - loss: 2891.531 - mae: 283.888 - mean_q: 219.487 Interval 4734 (2366500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2332 5 episodes - episode_reward: -412.467 [-876.836, -89.618] - loss: 2853.587 - mae: 293.751 - mean_q: 234.522 Interval 4735 (2367000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.1313 6 episodes - episode_reward: -445.492 [-706.413, -199.554] - loss: 2906.647 - mae: 282.970 - mean_q: 218.157 Interval 4736 (2367500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.8261 7 episodes - episode_reward: -542.645 [-1197.812, -100.000] - loss: 2672.095 - mae: 281.577 - mean_q: 214.327 Interval 4737 (2368000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.5323 7 episodes - episode_reward: -467.119 [-897.034, -88.684] - loss: 2916.965 - mae: 275.527 - mean_q: 206.272 Interval 4738 (2368500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.8946 7 episodes - episode_reward: -522.944 [-1125.057, -125.871] - loss: 3226.565 - mae: 289.633 - mean_q: 227.688 Interval 4739 (2369000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.3629 7 episodes - episode_reward: -552.957 [-933.596, -100.000] - loss: 2876.839 - mae: 282.584 - mean_q: 217.720 Interval 4740 (2369500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.5549 7 episodes - episode_reward: -557.751 [-1070.975, -221.701] - loss: 2721.364 - mae: 286.978 - mean_q: 222.097 Interval 4741 (2370000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.8440 6 episodes - episode_reward: -474.198 [-1129.880, -92.040] - loss: 2772.879 - mae: 282.707 - mean_q: 215.346 Interval 4742 (2370500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9306 3 episodes - episode_reward: -664.092 [-982.505, -178.325] - loss: 2683.769 - mae: 294.814 - mean_q: 233.922 Interval 4743 (2371000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5830 5 episodes - episode_reward: -454.534 [-986.973, -111.028] - loss: 2898.093 - mae: 278.492 - mean_q: 209.918 Interval 4744 (2371500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.1377 7 episodes - episode_reward: -449.284 [-1094.868, -89.776] - loss: 2492.786 - mae: 270.863 - mean_q: 196.753 Interval 4745 (2372000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1703 2 episodes - episode_reward: -510.221 [-866.169, -154.272] - loss: 2984.591 - mae: 288.555 - mean_q: 221.968 Interval 4746 (2372500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0072 3 episodes - episode_reward: -688.872 [-844.686, -510.921] - loss: 2890.132 - mae: 295.274 - mean_q: 233.031 Interval 4747 (2373000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.1481 5 episodes - episode_reward: -613.930 [-945.135, -290.406] - loss: 2547.369 - mae: 288.943 - mean_q: 224.827 Interval 4748 (2373500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.1071 5 episodes - episode_reward: -508.455 [-645.077, -170.212] - loss: 2677.489 - mae: 283.677 - mean_q: 217.511 Interval 4749 (2374000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1071 4 episodes - episode_reward: -390.460 [-506.549, -323.404] - loss: 2595.848 - mae: 287.585 - mean_q: 222.215 Interval 4750 (2374500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -9.4097 8 episodes - episode_reward: -590.954 [-1120.993, -260.479] - loss: 2616.720 - mae: 285.397 - mean_q: 220.343 Interval 4751 (2375000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -8.3213 5 episodes - episode_reward: -785.571 [-1043.828, -470.802] - loss: 2747.704 - mae: 277.623 - mean_q: 208.917 Interval 4752 (2375500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -9.3253 10 episodes - episode_reward: -484.570 [-962.403, -130.110] - loss: 2517.378 - mae: 269.344 - mean_q: 197.380 Interval 4753 (2376000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7187 6 episodes - episode_reward: -472.904 [-757.405, -186.803] - loss: 2129.195 - mae: 263.796 - mean_q: 189.106 Interval 4754 (2376500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.5741 5 episodes - episode_reward: -468.735 [-578.559, -358.333] - loss: 2268.696 - mae: 265.150 - mean_q: 190.997 Interval 4755 (2377000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.7473 4 episodes - episode_reward: -589.288 [-819.539, -303.436] - loss: 2215.350 - mae: 263.295 - mean_q: 187.217 Interval 4756 (2377500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2665 2 episodes - episode_reward: -813.272 [-1003.301, -623.242] - loss: 2096.537 - mae: 269.023 - mean_q: 193.947 Interval 4757 (2378000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.2750 2 episodes - episode_reward: -806.979 [-922.795, -691.163] - loss: 2580.081 - mae: 278.227 - mean_q: 207.186 Interval 4758 (2378500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.9730 7 episodes - episode_reward: -412.773 [-808.364, -99.513] - loss: 2499.686 - mae: 280.920 - mean_q: 212.948 Interval 4759 (2379000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -8.3708 8 episodes - episode_reward: -509.244 [-811.552, -298.694] - loss: 2255.340 - mae: 262.404 - mean_q: 185.379 Interval 4760 (2379500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -9.9366 9 episodes - episode_reward: -546.732 [-1150.975, -100.000] - loss: 2112.169 - mae: 269.418 - mean_q: 196.067 Interval 4761 (2380000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.1701 4 episodes - episode_reward: -640.489 [-851.920, -274.080] - loss: 2117.744 - mae: 268.908 - mean_q: 194.031 Interval 4762 (2380500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -8.3599 7 episodes - episode_reward: -641.406 [-982.081, -194.179] - loss: 1801.862 - mae: 265.492 - mean_q: 188.439 Interval 4763 (2381000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.7949 6 episodes - episode_reward: -597.999 [-1163.430, -100.000] - loss: 1952.495 - mae: 263.210 - mean_q: 184.573 Interval 4764 (2381500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.2769 6 episodes - episode_reward: -572.372 [-957.230, -157.214] - loss: 1969.736 - mae: 263.190 - mean_q: 184.961 Interval 4765 (2382000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9433 3 episodes - episode_reward: -334.710 [-463.105, -258.395] - loss: 1973.207 - mae: 258.118 - mean_q: 176.779 Interval 4766 (2382500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3245 6 episodes - episode_reward: -194.338 [-334.192, -118.962] - loss: 1798.629 - mae: 259.948 - mean_q: 179.104 Interval 4767 (2383000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7336 5 episodes - episode_reward: -177.184 [-203.584, -134.226] - loss: 1873.350 - mae: 255.831 - mean_q: 173.119 Interval 4768 (2383500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5595 7 episodes - episode_reward: -163.282 [-220.667, -92.696] - loss: 1676.100 - mae: 251.416 - mean_q: 166.685 Interval 4769 (2384000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4729 6 episodes - episode_reward: -212.799 [-261.814, -174.214] - loss: 1891.660 - mae: 255.825 - mean_q: 173.456 Interval 4770 (2384500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0272 5 episodes - episode_reward: -216.865 [-273.623, -180.553] - loss: 1850.435 - mae: 251.718 - mean_q: 167.446 Interval 4771 (2385000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.8777 3 episodes - episode_reward: -160.063 [-192.298, -139.448] - loss: 1655.125 - mae: 259.513 - mean_q: 178.769 Interval 4772 (2385500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5715 2 episodes - episode_reward: -140.645 [-150.311, -130.978] - loss: 1731.080 - mae: 267.064 - mean_q: 189.729 Interval 4773 (2386000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2330 4 episodes - episode_reward: -147.231 [-181.945, -89.505] - loss: 1858.266 - mae: 283.259 - mean_q: 213.130 Interval 4774 (2386500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6416 8 episodes - episode_reward: -159.670 [-222.320, -100.000] - loss: 1750.125 - mae: 288.756 - mean_q: 220.787 Interval 4775 (2387000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2873 6 episodes - episode_reward: -185.196 [-292.204, -97.452] - loss: 1654.976 - mae: 293.771 - mean_q: 227.348 Interval 4776 (2387500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1350 7 episodes - episode_reward: -166.218 [-213.600, -130.445] - loss: 1648.256 - mae: 289.316 - mean_q: 221.765 Interval 4777 (2388000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1895 4 episodes - episode_reward: -133.636 [-162.278, -106.398] - loss: 1498.212 - mae: 281.932 - mean_q: 212.176 Interval 4778 (2388500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8060 4 episodes - episode_reward: -228.473 [-273.879, -149.046] - loss: 1606.704 - mae: 286.040 - mean_q: 216.377 Interval 4779 (2389000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9039 3 episodes - episode_reward: -141.146 [-160.710, -118.142] - loss: 1631.350 - mae: 292.162 - mean_q: 226.468 Interval 4780 (2389500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2084 7 episodes - episode_reward: -166.048 [-204.769, -133.834] - loss: 1668.223 - mae: 290.301 - mean_q: 224.995 Interval 4781 (2390000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7981 5 episodes - episode_reward: -166.770 [-244.249, -24.984] - loss: 1734.630 - mae: 292.136 - mean_q: 226.917 Interval 4782 (2390500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7313 8 episodes - episode_reward: -180.051 [-271.599, -118.327] - loss: 1662.470 - mae: 291.443 - mean_q: 227.106 Interval 4783 (2391000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2226 4 episodes - episode_reward: -135.405 [-168.886, -81.194] - loss: 1647.788 - mae: 286.334 - mean_q: 220.751 Interval 4784 (2391500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7514 6 episodes - episode_reward: -240.846 [-354.726, -176.772] - loss: 1719.766 - mae: 292.821 - mean_q: 230.742 Interval 4785 (2392000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9472 8 episodes - episode_reward: -181.746 [-320.802, -13.927] - loss: 1722.941 - mae: 281.997 - mean_q: 214.925 Interval 4786 (2392500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4153 6 episodes - episode_reward: -195.498 [-322.479, -137.839] - loss: 1755.665 - mae: 284.188 - mean_q: 219.318 Interval 4787 (2393000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8428 4 episodes - episode_reward: -226.157 [-283.412, -201.668] - loss: 1844.041 - mae: 280.957 - mean_q: 215.589 Interval 4788 (2393500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2497 7 episodes - episode_reward: -155.254 [-225.542, -100.000] - loss: 1829.818 - mae: 284.214 - mean_q: 219.648 Interval 4789 (2394000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3299 7 episodes - episode_reward: -174.270 [-209.562, -115.786] - loss: 1849.867 - mae: 284.487 - mean_q: 221.850 Interval 4790 (2394500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1910 3 episodes - episode_reward: -204.534 [-245.920, -162.379] - loss: 1930.658 - mae: 291.548 - mean_q: 232.951 Interval 4791 (2395000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6096 5 episodes - episode_reward: -157.521 [-205.706, -68.157] - loss: 1817.687 - mae: 286.415 - mean_q: 227.268 Interval 4792 (2395500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.8662 3 episodes - episode_reward: -114.485 [-195.046, -7.452] - loss: 2112.266 - mae: 292.175 - mean_q: 235.262 Interval 4793 (2396000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9652 7 episodes - episode_reward: -154.359 [-248.771, -25.946] - loss: 1765.084 - mae: 305.173 - mean_q: 256.093 Interval 4794 (2396500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5851 5 episodes - episode_reward: -164.595 [-224.609, -124.035] - loss: 2009.243 - mae: 302.366 - mean_q: 253.180 Interval 4795 (2397000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2058 4 episodes - episode_reward: -148.424 [-176.030, -117.990] - loss: 2081.883 - mae: 306.482 - mean_q: 261.331 Interval 4796 (2397500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0494 4 episodes - episode_reward: -111.880 [-225.142, 28.363] - loss: 1908.459 - mae: 315.001 - mean_q: 276.026 Interval 4797 (2398000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0225 5 episodes - episode_reward: -225.791 [-269.088, -152.272] - loss: 2125.707 - mae: 318.940 - mean_q: 281.746 Interval 4798 (2398500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5706 7 episodes - episode_reward: -177.493 [-218.485, -142.387] - loss: 1958.560 - mae: 317.971 - mean_q: 281.043 Interval 4799 (2399000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1851 6 episodes - episode_reward: -170.411 [-313.605, -9.182] - loss: 2139.192 - mae: 323.592 - mean_q: 289.940 Interval 4800 (2399500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5133 7 episodes - episode_reward: -190.928 [-236.151, -106.993] - loss: 2172.254 - mae: 330.260 - mean_q: 299.057 Interval 4801 (2400000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4700 4 episodes - episode_reward: -180.098 [-216.738, -133.244] - loss: 2308.670 - mae: 331.637 - mean_q: 302.806 Interval 4802 (2400500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0412 3 episodes - episode_reward: -177.845 [-262.401, -100.178] - loss: 2056.077 - mae: 335.626 - mean_q: 309.276 Interval 4803 (2401000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1080 2 episodes - episode_reward: -259.662 [-268.344, -250.980] - loss: 2068.140 - mae: 336.349 - mean_q: 310.659 Interval 4804 (2401500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0999 6 episodes - episode_reward: -171.832 [-233.428, -115.882] - loss: 2036.624 - mae: 339.987 - mean_q: 316.792 Interval 4805 (2402000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4086 3 episodes - episode_reward: -224.957 [-305.457, -178.308] - loss: 1987.283 - mae: 339.511 - mean_q: 315.350 Interval 4806 (2402500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8637 5 episodes - episode_reward: -203.946 [-252.039, -132.692] - loss: 1792.916 - mae: 338.819 - mean_q: 314.513 Interval 4807 (2403000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.0750 2 episodes - episode_reward: -176.393 [-195.993, -156.792] - loss: 1830.312 - mae: 339.042 - mean_q: 316.552 Interval 4808 (2403500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6044 3 episodes - episode_reward: -288.233 [-376.997, -190.095] - loss: 1846.918 - mae: 335.553 - mean_q: 311.124 Interval 4809 (2404000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4661 5 episodes - episode_reward: -169.694 [-215.447, -126.475] - loss: 1729.151 - mae: 332.771 - mean_q: 307.589 Interval 4810 (2404500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8500 7 episodes - episode_reward: -204.977 [-336.623, -136.338] - loss: 1747.292 - mae: 345.353 - mean_q: 325.345 Interval 4811 (2405000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5303 4 episodes - episode_reward: -185.607 [-303.369, -83.555] - loss: 1729.125 - mae: 333.284 - mean_q: 308.272 Interval 4812 (2405500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4579 3 episodes - episode_reward: -242.320 [-382.557, -125.688] - loss: 1760.190 - mae: 337.212 - mean_q: 314.613 Interval 4813 (2406000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0570 5 episodes - episode_reward: -203.448 [-264.077, -114.183] - loss: 1707.976 - mae: 335.663 - mean_q: 313.333 Interval 4814 (2406500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8994 6 episodes - episode_reward: -162.707 [-212.477, -112.952] - loss: 1577.133 - mae: 332.475 - mean_q: 308.730 Interval 4815 (2407000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0005 4 episodes - episode_reward: -240.152 [-257.061, -198.188] - loss: 1776.980 - mae: 331.264 - mean_q: 306.469 Interval 4816 (2407500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3803 3 episodes - episode_reward: -238.510 [-250.817, -222.273] - loss: 1673.076 - mae: 335.944 - mean_q: 314.432 Interval 4817 (2408000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5288 3 episodes - episode_reward: -248.115 [-302.224, -205.218] - loss: 1552.030 - mae: 340.630 - mean_q: 321.807 Interval 4818 (2408500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0686 5 episodes - episode_reward: -186.350 [-293.222, -129.072] - loss: 1573.597 - mae: 339.673 - mean_q: 320.737 Interval 4819 (2409000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6297 7 episodes - episode_reward: -205.787 [-315.436, -157.646] - loss: 1707.468 - mae: 336.303 - mean_q: 314.946 Interval 4820 (2409500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1649 4 episodes - episode_reward: -221.041 [-264.982, -196.227] - loss: 1704.813 - mae: 331.936 - mean_q: 308.573 Interval 4821 (2410000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5103 10 episodes - episode_reward: -191.451 [-321.456, -100.000] - loss: 1541.060 - mae: 334.958 - mean_q: 313.034 Interval 4822 (2410500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5215 9 episodes - episode_reward: -201.132 [-247.784, -149.344] - loss: 1504.284 - mae: 326.508 - mean_q: 299.827 Interval 4823 (2411000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2000 5 episodes - episode_reward: -214.323 [-293.940, -150.950] - loss: 1519.919 - mae: 323.443 - mean_q: 293.579 Interval 4824 (2411500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2716 6 episodes - episode_reward: -189.979 [-239.610, -100.000] - loss: 1515.514 - mae: 319.293 - mean_q: 287.908 Interval 4825 (2412000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3408 4 episodes - episode_reward: -175.562 [-236.324, -119.561] - loss: 1506.074 - mae: 321.621 - mean_q: 290.499 Interval 4826 (2412500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8760 5 episodes - episode_reward: -185.892 [-221.890, -150.964] - loss: 1530.484 - mae: 315.709 - mean_q: 281.362 Interval 4827 (2413000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5039 8 episodes - episode_reward: -143.248 [-228.347, 25.954] - loss: 1591.520 - mae: 315.947 - mean_q: 282.058 Interval 4828 (2413500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8717 8 episodes - episode_reward: -186.967 [-281.542, -124.546] - loss: 1410.731 - mae: 308.847 - mean_q: 272.629 Interval 4829 (2414000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3018 4 episodes - episode_reward: -154.906 [-246.940, -27.761] - loss: 1389.582 - mae: 315.181 - mean_q: 281.522 Interval 4830 (2414500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5637 7 episodes - episode_reward: -186.372 [-243.827, -135.006] - loss: 1428.783 - mae: 312.383 - mean_q: 277.620 Interval 4831 (2415000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6154 11 episodes - episode_reward: -169.767 [-294.716, -88.995] - loss: 1379.270 - mae: 304.884 - mean_q: 266.754 Interval 4832 (2415500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0021 6 episodes - episode_reward: -161.856 [-238.060, -55.936] - loss: 1354.871 - mae: 300.827 - mean_q: 259.193 Interval 4833 (2416000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5135 8 episodes - episode_reward: -163.731 [-223.781, -32.158] - loss: 1322.721 - mae: 295.979 - mean_q: 253.344 Interval 4834 (2416500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2128 6 episodes - episode_reward: -173.911 [-208.014, -120.051] - loss: 1421.341 - mae: 287.794 - mean_q: 239.587 Interval 4835 (2417000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8500 9 episodes - episode_reward: -162.913 [-280.009, -68.374] - loss: 1298.130 - mae: 287.476 - mean_q: 239.786 Interval 4836 (2417500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3008 7 episodes - episode_reward: -165.949 [-291.420, -1.668] - loss: 1303.776 - mae: 278.602 - mean_q: 225.454 Interval 4837 (2418000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5412 5 episodes - episode_reward: -225.547 [-393.098, -130.733] - loss: 1178.673 - mae: 272.643 - mean_q: 218.239 Interval 4838 (2418500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2270 7 episodes - episode_reward: -172.757 [-247.014, -9.237] - loss: 1150.911 - mae: 279.873 - mean_q: 229.036 Interval 4839 (2419000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4242 4 episodes - episode_reward: -191.060 [-257.634, -150.768] - loss: 1220.579 - mae: 266.644 - mean_q: 210.240 Interval 4840 (2419500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4069 7 episodes - episode_reward: -163.235 [-245.576, -100.000] - loss: 1256.410 - mae: 263.085 - mean_q: 205.564 Interval 4841 (2420000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2072 6 episodes - episode_reward: -183.182 [-257.292, -127.709] - loss: 1240.270 - mae: 257.297 - mean_q: 196.431 Interval 4842 (2420500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9381 8 episodes - episode_reward: -189.624 [-274.079, -122.595] - loss: 1200.022 - mae: 248.285 - mean_q: 182.539 Interval 4843 (2421000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.1704 2 episodes - episode_reward: -237.905 [-265.220, -210.590] - loss: 1126.678 - mae: 247.696 - mean_q: 182.800 Interval 4844 (2421500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3853 7 episodes - episode_reward: -186.109 [-267.092, -100.000] - loss: 1203.412 - mae: 242.608 - mean_q: 175.927 Interval 4845 (2422000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0857 5 episodes - episode_reward: -209.516 [-331.732, -130.295] - loss: 1149.128 - mae: 236.374 - mean_q: 166.303 Interval 4846 (2422500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5656 6 episodes - episode_reward: -207.211 [-303.437, -152.480] - loss: 1010.189 - mae: 231.300 - mean_q: 158.880 Interval 4847 (2423000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2657 9 episodes - episode_reward: -177.882 [-273.708, -121.755] - loss: 1069.390 - mae: 225.972 - mean_q: 149.386 Interval 4848 (2423500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0005 5 episodes - episode_reward: -202.903 [-255.897, -94.114] - loss: 1105.276 - mae: 213.318 - mean_q: 131.238 Interval 4849 (2424000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6386 8 episodes - episode_reward: -168.946 [-215.187, -106.236] - loss: 1075.842 - mae: 208.796 - mean_q: 124.998 Interval 4850 (2424500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7310 8 episodes - episode_reward: -169.653 [-267.657, -81.154] - loss: 1046.722 - mae: 206.148 - mean_q: 120.814 Interval 4851 (2425000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5685 4 episodes - episode_reward: -204.616 [-342.335, -146.670] - loss: 962.980 - mae: 202.299 - mean_q: 115.605 Interval 4852 (2425500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9201 4 episodes - episode_reward: -422.769 [-726.111, -179.774] - loss: 1003.000 - mae: 194.032 - mean_q: 103.556 Interval 4853 (2426000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.0286 7 episodes - episode_reward: -463.559 [-715.013, -329.590] - loss: 897.938 - mae: 194.095 - mean_q: 103.984 Interval 4854 (2426500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.7016 7 episodes - episode_reward: -419.778 [-640.620, -257.350] - loss: 1003.357 - mae: 188.313 - mean_q: 94.723 Interval 4855 (2427000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.6163 8 episodes - episode_reward: -569.794 [-1095.107, -384.401] - loss: 861.604 - mae: 181.034 - mean_q: 82.940 Interval 4856 (2427500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -9.5802 7 episodes - episode_reward: -681.685 [-1088.206, -316.823] - loss: 890.865 - mae: 180.838 - mean_q: 82.682 Interval 4857 (2428000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.5222 9 episodes - episode_reward: -369.958 [-577.833, -98.944] - loss: 855.823 - mae: 177.720 - mean_q: 76.423 Interval 4858 (2428500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1306 4 episodes - episode_reward: -494.691 [-584.792, -390.870] - loss: 894.624 - mae: 176.722 - mean_q: 75.379 Interval 4859 (2429000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -7.6919 6 episodes - episode_reward: -656.129 [-1158.581, -301.253] - loss: 862.284 - mae: 170.377 - mean_q: 66.422 Interval 4860 (2429500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.8672 8 episodes - episode_reward: -509.106 [-783.557, -230.250] - loss: 822.272 - mae: 164.322 - mean_q: 56.244 Interval 4861 (2430000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.3984 9 episodes - episode_reward: -367.912 [-521.450, -105.908] - loss: 782.514 - mae: 169.018 - mean_q: 63.688 Interval 4862 (2430500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.6381 11 episodes - episode_reward: -367.008 [-551.442, -164.450] - loss: 790.165 - mae: 165.423 - mean_q: 58.481 Interval 4863 (2431000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -9.2911 7 episodes - episode_reward: -680.201 [-1217.962, -175.025] - loss: 737.303 - mae: 164.453 - mean_q: 56.733 Interval 4864 (2431500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7267 6 episodes - episode_reward: -391.835 [-504.582, -144.032] - loss: 793.600 - mae: 160.372 - mean_q: 49.976 Interval 4865 (2432000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -9.3590 6 episodes - episode_reward: -793.025 [-1207.170, -218.833] - loss: 721.876 - mae: 156.787 - mean_q: 44.915 Interval 4866 (2432500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9969 5 episodes - episode_reward: -400.323 [-1040.243, -136.675] - loss: 739.788 - mae: 157.048 - mean_q: 46.265 Interval 4867 (2433000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2714 9 episodes - episode_reward: -174.357 [-237.418, -100.000] - loss: 684.477 - mae: 151.214 - mean_q: 36.853 Interval 4868 (2433500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3622 8 episodes - episode_reward: -206.044 [-317.419, -118.824] - loss: 661.120 - mae: 153.416 - mean_q: 40.595 Interval 4869 (2434000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3646 6 episodes - episode_reward: -202.920 [-251.870, -124.622] - loss: 647.749 - mae: 151.955 - mean_q: 38.517 Interval 4870 (2434500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5309 8 episodes - episode_reward: -164.551 [-289.884, -29.161] - loss: 597.868 - mae: 145.715 - mean_q: 27.988 Interval 4871 (2435000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1692 6 episodes - episode_reward: -146.972 [-231.705, -20.930] - loss: 567.797 - mae: 142.820 - mean_q: 23.059 Interval 4872 (2435500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1008 6 episodes - episode_reward: -214.609 [-487.368, -13.652] - loss: 583.016 - mae: 141.923 - mean_q: 22.123 Interval 4873 (2436000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.5086 4 episodes - episode_reward: -672.923 [-954.124, -528.914] - loss: 529.943 - mae: 138.142 - mean_q: 16.005 Interval 4874 (2436500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.1428 5 episodes - episode_reward: -565.656 [-870.336, -327.519] - loss: 551.902 - mae: 140.846 - mean_q: 21.104 Interval 4875 (2437000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.3755 9 episodes - episode_reward: -365.863 [-542.569, -100.000] - loss: 538.354 - mae: 143.564 - mean_q: 26.105 Interval 4876 (2437500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.2468 6 episodes - episode_reward: -548.259 [-938.718, -157.641] - loss: 514.201 - mae: 137.940 - mean_q: 17.544 Interval 4877 (2438000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -9.9959 8 episodes - episode_reward: -519.783 [-1095.513, -100.000] - loss: 554.161 - mae: 138.899 - mean_q: 19.170 Interval 4878 (2438500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3251 5 episodes - episode_reward: -675.785 [-960.930, -330.935] - loss: 476.501 - mae: 140.629 - mean_q: 21.658 Interval 4879 (2439000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -9.0260 7 episodes - episode_reward: -621.844 [-1339.179, -352.484] - loss: 507.582 - mae: 141.520 - mean_q: 23.651 Interval 4880 (2439500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.8484 5 episodes - episode_reward: -628.896 [-892.598, -437.717] - loss: 497.144 - mae: 136.882 - mean_q: 15.390 Interval 4881 (2440000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.3013 6 episodes - episode_reward: -513.955 [-935.286, -324.774] - loss: 534.185 - mae: 135.131 - mean_q: 13.033 Interval 4882 (2440500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.6421 10 episodes - episode_reward: -371.043 [-762.755, -128.754] - loss: 427.846 - mae: 133.064 - mean_q: 9.447 Interval 4883 (2441000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.1549 8 episodes - episode_reward: -539.432 [-954.401, -107.737] - loss: 440.181 - mae: 130.487 - mean_q: 5.357 Interval 4884 (2441500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7851 4 episodes - episode_reward: -328.981 [-446.002, -153.090] - loss: 402.398 - mae: 129.695 - mean_q: 4.321 Interval 4885 (2442000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -10.2430 7 episodes - episode_reward: -617.134 [-1162.659, -100.000] - loss: 453.901 - mae: 130.642 - mean_q: 6.900 Interval 4886 (2442500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.5925 7 episodes - episode_reward: -584.136 [-1125.824, -303.227] - loss: 376.715 - mae: 128.352 - mean_q: 3.895 Interval 4887 (2443000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.3611 5 episodes - episode_reward: -794.285 [-1123.952, -373.360] - loss: 418.685 - mae: 130.630 - mean_q: 6.778 Interval 4888 (2443500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.2365 6 episodes - episode_reward: -735.010 [-1221.424, -100.000] - loss: 450.778 - mae: 129.763 - mean_q: 5.540 Interval 4889 (2444000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.7564 5 episodes - episode_reward: -566.407 [-1103.668, -148.023] - loss: 425.774 - mae: 129.691 - mean_q: 6.427 Interval 4890 (2444500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0935 5 episodes - episode_reward: -412.167 [-1070.588, -155.338] - loss: 378.748 - mae: 131.719 - mean_q: 8.758 Interval 4891 (2445000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4419 6 episodes - episode_reward: -208.189 [-308.026, -131.948] - loss: 407.169 - mae: 127.523 - mean_q: 2.100 Interval 4892 (2445500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3132 7 episodes - episode_reward: -162.114 [-230.324, -13.108] - loss: 434.735 - mae: 125.252 - mean_q: -0.632 Interval 4893 (2446000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4785 6 episodes - episode_reward: -204.415 [-288.055, -112.267] - loss: 388.260 - mae: 123.307 - mean_q: -4.584 Interval 4894 (2446500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6135 6 episodes - episode_reward: -219.453 [-313.228, -111.170] - loss: 357.706 - mae: 122.743 - mean_q: -5.642 Interval 4895 (2447000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.7472 6 episodes - episode_reward: -147.855 [-205.674, -24.700] - loss: 339.457 - mae: 122.052 - mean_q: -7.008 Interval 4896 (2447500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0044 7 episodes - episode_reward: -209.146 [-299.474, -134.930] - loss: 351.178 - mae: 119.303 - mean_q: -11.788 Interval 4897 (2448000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4085 8 episodes - episode_reward: -150.203 [-213.579, -69.727] - loss: 313.910 - mae: 118.847 - mean_q: -12.979 Interval 4898 (2448500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5467 7 episodes - episode_reward: -186.208 [-273.852, -100.728] - loss: 290.166 - mae: 117.723 - mean_q: -14.660 Interval 4899 (2449000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8742 6 episodes - episode_reward: -145.656 [-181.110, -106.503] - loss: 296.250 - mae: 114.945 - mean_q: -18.501 Interval 4900 (2449500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6906 6 episodes - episode_reward: -223.629 [-265.544, -139.453] - loss: 293.081 - mae: 115.739 - mean_q: -18.627 Interval 4901 (2450000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7977 9 episodes - episode_reward: -166.374 [-208.180, -100.000] - loss: 271.657 - mae: 114.751 - mean_q: -21.737 Interval 4902 (2450500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8394 6 episodes - episode_reward: -229.357 [-337.339, -142.418] - loss: 279.149 - mae: 115.009 - mean_q: -20.272 Interval 4903 (2451000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5341 7 episodes - episode_reward: -186.094 [-226.575, -158.946] - loss: 279.179 - mae: 115.778 - mean_q: -19.035 Interval 4904 (2451500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7731 8 episodes - episode_reward: -168.465 [-258.286, -40.072] - loss: 256.687 - mae: 113.245 - mean_q: -24.927 Interval 4905 (2452000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9269 5 episodes - episode_reward: -196.647 [-218.098, -159.502] - loss: 231.991 - mae: 113.033 - mean_q: -24.942 Interval 4906 (2452500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0907 8 episodes - episode_reward: -195.479 [-288.425, -128.584] - loss: 217.252 - mae: 112.058 - mean_q: -27.921 Interval 4907 (2453000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5881 7 episodes - episode_reward: -186.213 [-275.013, -117.793] - loss: 235.216 - mae: 111.040 - mean_q: -29.619 Interval 4908 (2453500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2626 5 episodes - episode_reward: -198.783 [-272.287, -141.783] - loss: 253.286 - mae: 110.290 - mean_q: -31.363 Interval 4909 (2454000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2895 6 episodes - episode_reward: -202.348 [-354.927, -149.255] - loss: 230.572 - mae: 109.974 - mean_q: -33.043 Interval 4910 (2454500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8191 5 episodes - episode_reward: -192.890 [-267.962, -136.714] - loss: 228.398 - mae: 109.844 - mean_q: -33.464 Interval 4911 (2455000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7050 4 episodes - episode_reward: -308.613 [-593.793, -183.114] - loss: 220.973 - mae: 111.222 - mean_q: -31.543 Interval 4912 (2455500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3723 6 episodes - episode_reward: -204.908 [-301.741, -117.279] - loss: 240.223 - mae: 109.907 - mean_q: -35.494 Interval 4913 (2456000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8230 7 episodes - episode_reward: -353.358 [-587.680, -127.849] - loss: 201.377 - mae: 109.771 - mean_q: -35.428 Interval 4914 (2456500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6108 3 episodes - episode_reward: -775.125 [-1083.907, -594.954] - loss: 213.049 - mae: 108.531 - mean_q: -37.600 Interval 4915 (2457000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3504 2 episodes - episode_reward: -599.709 [-658.104, -541.314] - loss: 253.259 - mae: 109.629 - mean_q: -35.572 Interval 4916 (2457500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3128 4 episodes - episode_reward: -629.991 [-843.338, -527.656] - loss: 235.054 - mae: 106.899 - mean_q: -41.244 Interval 4917 (2458000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6627 4 episodes - episode_reward: -313.837 [-630.618, -151.587] - loss: 232.605 - mae: 106.236 - mean_q: -42.471 Interval 4918 (2458500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.8398 5 episodes - episode_reward: -581.876 [-939.928, -315.598] - loss: 241.811 - mae: 104.204 - mean_q: -45.393 Interval 4919 (2459000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.5332 9 episodes - episode_reward: -324.457 [-573.518, -133.376] - loss: 204.071 - mae: 105.208 - mean_q: -43.742 Interval 4920 (2459500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4874 2 episodes - episode_reward: -892.739 [-1487.788, -297.690] - loss: 216.959 - mae: 106.020 - mean_q: -41.977 Interval 4921 (2460000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6037 5 episodes - episode_reward: -412.719 [-432.445, -388.057] - loss: 198.945 - mae: 105.675 - mean_q: -43.369 Interval 4922 (2460500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7316 1 episodes - episode_reward: -1616.309 [-1616.309, -1616.309] - loss: 219.023 - mae: 104.446 - mean_q: -45.720 Interval 4923 (2461000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1402 Interval 4924 (2461500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2017 Interval 4925 (2462000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9899 2 episodes - episode_reward: -737.016 [-1139.319, -334.713] - loss: 194.069 - mae: 101.811 - mean_q: -51.694 Interval 4926 (2462500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4810 6 episodes - episode_reward: -376.299 [-767.720, -222.305] - loss: 216.444 - mae: 100.802 - mean_q: -54.133 Interval 4927 (2463000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.1793 3 episodes - episode_reward: -873.092 [-1505.592, -100.000] - loss: 184.658 - mae: 100.015 - mean_q: -56.095 Interval 4928 (2463500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8014 5 episodes - episode_reward: -456.670 [-989.162, -105.455] - loss: 182.152 - mae: 99.259 - mean_q: -57.605 Interval 4929 (2464000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0357 3 episodes - episode_reward: -770.076 [-1341.353, -292.928] - loss: 173.632 - mae: 99.673 - mean_q: -56.930 Interval 4930 (2464500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0034 4 episodes - episode_reward: -488.131 [-852.687, -248.341] - loss: 176.224 - mae: 100.462 - mean_q: -55.845 Interval 4931 (2465000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1444 3 episodes - episode_reward: -523.815 [-828.616, -341.216] - loss: 202.364 - mae: 97.629 - mean_q: -60.567 Interval 4932 (2465500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.5634 3 episodes - episode_reward: -1065.474 [-1527.329, -595.723] - loss: 168.015 - mae: 98.535 - mean_q: -59.578 Interval 4933 (2466000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3989 3 episodes - episode_reward: -545.619 [-779.277, -413.411] - loss: 167.977 - mae: 97.900 - mean_q: -59.749 Interval 4934 (2466500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6398 4 episodes - episode_reward: -387.909 [-609.179, -99.381] - loss: 165.502 - mae: 97.289 - mean_q: -61.481 Interval 4935 (2467000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8797 5 episodes - episode_reward: -267.241 [-588.789, -100.000] - loss: 189.399 - mae: 96.616 - mean_q: -63.197 Interval 4936 (2467500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5230 7 episodes - episode_reward: -245.235 [-654.362, -100.638] - loss: 168.275 - mae: 96.101 - mean_q: -64.692 Interval 4937 (2468000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0757 8 episodes - episode_reward: -199.072 [-258.588, -121.432] - loss: 158.911 - mae: 96.405 - mean_q: -63.610 Interval 4938 (2468500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1754 10 episodes - episode_reward: -151.965 [-197.884, -96.376] - loss: 163.600 - mae: 95.652 - mean_q: -65.614 Interval 4939 (2469000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8066 6 episodes - episode_reward: -156.683 [-235.167, -24.779] - loss: 137.822 - mae: 93.762 - mean_q: -69.365 Interval 4940 (2469500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1148 9 episodes - episode_reward: -171.851 [-217.131, -100.000] - loss: 147.350 - mae: 94.543 - mean_q: -67.433 Interval 4941 (2470000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1715 9 episodes - episode_reward: -181.099 [-267.748, -100.000] - loss: 155.276 - mae: 95.103 - mean_q: -67.244 Interval 4942 (2470500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8574 8 episodes - episode_reward: -173.590 [-234.510, -113.868] - loss: 149.781 - mae: 94.989 - mean_q: -67.010 Interval 4943 (2471000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5759 8 episodes - episode_reward: -171.561 [-234.198, -95.265] - loss: 153.368 - mae: 95.003 - mean_q: -67.711 Interval 4944 (2471500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6131 4 episodes - episode_reward: -191.133 [-235.063, -155.368] - loss: 159.683 - mae: 95.360 - mean_q: -66.743 Interval 4945 (2472000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6460 6 episodes - episode_reward: -227.564 [-438.690, -130.788] - loss: 147.886 - mae: 95.106 - mean_q: -66.427 Interval 4946 (2472500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8755 7 episodes - episode_reward: -197.863 [-335.933, -139.813] - loss: 136.951 - mae: 93.605 - mean_q: -70.574 Interval 4947 (2473000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0100 8 episodes - episode_reward: -187.713 [-326.718, -114.842] - loss: 145.004 - mae: 93.350 - mean_q: -70.411 Interval 4948 (2473500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0441 8 episodes - episode_reward: -186.665 [-271.441, -100.000] - loss: 123.181 - mae: 93.271 - mean_q: -71.517 Interval 4949 (2474000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6335 7 episodes - episode_reward: -191.189 [-354.532, -70.712] - loss: 152.307 - mae: 93.867 - mean_q: -69.711 Interval 4950 (2474500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7557 8 episodes - episode_reward: -175.772 [-261.862, -117.908] - loss: 122.138 - mae: 92.239 - mean_q: -73.749 Interval 4951 (2475000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1609 8 episodes - episode_reward: -200.364 [-276.728, -127.620] - loss: 132.551 - mae: 91.136 - mean_q: -74.647 Interval 4952 (2475500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4977 7 episodes - episode_reward: -178.989 [-244.585, -128.246] - loss: 109.787 - mae: 91.128 - mean_q: -75.116 Interval 4953 (2476000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8064 9 episodes - episode_reward: -154.722 [-275.177, -68.875] - loss: 106.852 - mae: 89.819 - mean_q: -77.110 Interval 4954 (2476500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9911 5 episodes - episode_reward: -198.963 [-271.299, -156.541] - loss: 112.410 - mae: 89.619 - mean_q: -76.283 Interval 4955 (2477000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2500 10 episodes - episode_reward: -157.744 [-270.579, -96.345] - loss: 130.999 - mae: 89.203 - mean_q: -76.921 Interval 4956 (2477500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8898 8 episodes - episode_reward: -183.460 [-241.195, -76.563] - loss: 98.717 - mae: 89.539 - mean_q: -76.135 Interval 4957 (2478000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7275 5 episodes - episode_reward: -249.123 [-346.702, -160.804] - loss: 101.606 - mae: 88.567 - mean_q: -78.753 Interval 4958 (2478500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7895 9 episodes - episode_reward: -225.670 [-524.911, -131.505] - loss: 101.258 - mae: 88.793 - mean_q: -77.572 Interval 4959 (2479000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9835 8 episodes - episode_reward: -170.737 [-273.190, -113.997] - loss: 92.521 - mae: 87.526 - mean_q: -80.336 Interval 4960 (2479500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6162 10 episodes - episode_reward: -193.188 [-386.800, -98.727] - loss: 107.604 - mae: 88.052 - mean_q: -79.455 Interval 4961 (2480000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9288 8 episodes - episode_reward: -181.694 [-284.047, -42.289] - loss: 81.606 - mae: 87.691 - mean_q: -79.644 Interval 4962 (2480500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4219 7 episodes - episode_reward: -165.126 [-212.674, -100.000] - loss: 100.609 - mae: 87.935 - mean_q: -79.124 Interval 4963 (2481000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3646 9 episodes - episode_reward: -193.167 [-269.450, -164.748] - loss: 102.129 - mae: 87.180 - mean_q: -80.624 Interval 4964 (2481500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6997 7 episodes - episode_reward: -172.291 [-204.742, -135.407] - loss: 101.479 - mae: 88.361 - mean_q: -77.912 Interval 4965 (2482000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6027 8 episodes - episode_reward: -182.370 [-304.975, -124.717] - loss: 95.889 - mae: 88.746 - mean_q: -79.017 Interval 4966 (2482500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4101 7 episodes - episode_reward: -163.565 [-239.382, -41.162] - loss: 118.207 - mae: 89.214 - mean_q: -76.604 Interval 4967 (2483000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7036 7 episodes - episode_reward: -193.225 [-275.009, -136.267] - loss: 105.776 - mae: 89.782 - mean_q: -76.462 Interval 4968 (2483500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7118 11 episodes - episode_reward: -173.766 [-259.748, -100.000] - loss: 80.265 - mae: 89.448 - mean_q: -78.435 Interval 4969 (2484000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9617 9 episodes - episode_reward: -162.771 [-221.550, -102.443] - loss: 101.364 - mae: 89.769 - mean_q: -75.611 Interval 4970 (2484500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4719 8 episodes - episode_reward: -156.896 [-208.329, -116.270] - loss: 94.031 - mae: 88.939 - mean_q: -78.483 Interval 4971 (2485000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0454 7 episodes - episode_reward: -144.745 [-225.564, -25.417] - loss: 93.721 - mae: 88.777 - mean_q: -78.825 Interval 4972 (2485500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5077 4 episodes - episode_reward: -187.184 [-211.787, -169.056] - loss: 91.673 - mae: 89.929 - mean_q: -76.790 Interval 4973 (2486000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1076 7 episodes - episode_reward: -152.995 [-294.262, -74.717] - loss: 106.333 - mae: 89.167 - mean_q: -78.511 Interval 4974 (2486500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1895 8 episodes - episode_reward: -193.295 [-286.794, -132.853] - loss: 101.698 - mae: 88.861 - mean_q: -77.963 Interval 4975 (2487000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9088 6 episodes - episode_reward: -149.989 [-190.106, -14.767] - loss: 101.051 - mae: 88.714 - mean_q: -79.531 Interval 4976 (2487500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7950 8 episodes - episode_reward: -184.093 [-258.773, -16.299] - loss: 106.074 - mae: 88.456 - mean_q: -79.072 Interval 4977 (2488000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6705 8 episodes - episode_reward: -173.593 [-224.591, -78.834] - loss: 97.021 - mae: 87.883 - mean_q: -81.111 Interval 4978 (2488500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3128 8 episodes - episode_reward: -204.146 [-256.405, -119.603] - loss: 110.729 - mae: 87.056 - mean_q: -81.927 Interval 4979 (2489000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7379 6 episodes - episode_reward: -143.301 [-256.583, 57.401] - loss: 98.438 - mae: 86.806 - mean_q: -81.421 Interval 4980 (2489500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3300 6 episodes - episode_reward: -187.421 [-230.333, -161.196] - loss: 127.619 - mae: 86.576 - mean_q: -81.154 Interval 4981 (2490000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8875 8 episodes - episode_reward: -176.822 [-363.379, -120.325] - loss: 135.974 - mae: 85.693 - mean_q: -82.278 Interval 4982 (2490500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7854 7 episodes - episode_reward: -198.974 [-222.282, -164.053] - loss: 131.392 - mae: 85.802 - mean_q: -80.024 Interval 4983 (2491000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8270 9 episodes - episode_reward: -156.123 [-219.658, -81.760] - loss: 110.764 - mae: 85.878 - mean_q: -78.918 Interval 4984 (2491500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3398 8 episodes - episode_reward: -156.436 [-203.312, -4.329] - loss: 114.290 - mae: 86.903 - mean_q: -76.064 Interval 4985 (2492000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3751 5 episodes - episode_reward: -221.501 [-361.548, -172.594] - loss: 101.429 - mae: 87.408 - mean_q: -75.460 Interval 4986 (2492500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3911 4 episodes - episode_reward: -172.886 [-235.010, -122.307] - loss: 99.165 - mae: 89.016 - mean_q: -73.227 Interval 4987 (2493000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0436 6 episodes - episode_reward: -257.503 [-667.813, -123.707] - loss: 88.395 - mae: 90.696 - mean_q: -70.235 Interval 4988 (2493500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.1839 6 episodes - episode_reward: -646.957 [-1101.528, -299.632] - loss: 92.906 - mae: 91.505 - mean_q: -68.819 Interval 4989 (2494000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7006 4 episodes - episode_reward: -642.320 [-1028.655, -406.760] - loss: 104.125 - mae: 93.049 - mean_q: -66.963 Interval 4990 (2494500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.5124 4 episodes - episode_reward: -583.322 [-745.033, -274.358] - loss: 92.360 - mae: 95.592 - mean_q: -60.878 Interval 4991 (2495000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.1183 7 episodes - episode_reward: -577.652 [-1040.470, -281.687] - loss: 109.628 - mae: 94.920 - mean_q: -60.710 Interval 4992 (2495500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.4200 6 episodes - episode_reward: -586.146 [-1052.284, -421.116] - loss: 117.743 - mae: 96.932 - mean_q: -57.710 Interval 4993 (2496000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.4876 6 episodes - episode_reward: -430.440 [-807.288, -145.531] - loss: 115.565 - mae: 96.892 - mean_q: -59.335 Interval 4994 (2496500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.2751 6 episodes - episode_reward: -558.203 [-726.977, -289.365] - loss: 111.851 - mae: 97.629 - mean_q: -57.997 Interval 4995 (2497000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.7530 6 episodes - episode_reward: -497.265 [-672.448, -374.683] - loss: 117.878 - mae: 97.691 - mean_q: -57.173 Interval 4996 (2497500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.1251 5 episodes - episode_reward: -590.835 [-799.641, -374.696] - loss: 118.342 - mae: 98.644 - mean_q: -54.282 Interval 4997 (2498000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5314 8 episodes - episode_reward: -176.552 [-260.627, -11.388] - loss: 106.456 - mae: 99.184 - mean_q: -53.610 Interval 4998 (2498500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1882 4 episodes - episode_reward: -260.019 [-435.627, -146.401] - loss: 109.424 - mae: 99.868 - mean_q: -51.669 Interval 4999 (2499000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3759 6 episodes - episode_reward: -207.555 [-278.246, -147.195] - loss: 96.831 - mae: 101.307 - mean_q: -49.069 Interval 5000 (2499500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7218 7 episodes - episode_reward: -194.556 [-254.876, -97.461] - loss: 110.813 - mae: 102.409 - mean_q: -46.887 Interval 5001 (2500000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2665 7 episodes - episode_reward: -163.149 [-273.544, -100.000] - loss: 111.464 - mae: 103.749 - mean_q: -44.874 Interval 5002 (2500500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0494 6 episodes - episode_reward: -163.973 [-228.009, -112.278] - loss: 96.638 - mae: 103.232 - mean_q: -46.288 Interval 5003 (2501000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1381 7 episodes - episode_reward: -150.600 [-220.472, 1.922] - loss: 104.880 - mae: 104.095 - mean_q: -44.676 Interval 5004 (2501500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9329 6 episodes - episode_reward: -159.901 [-179.539, -141.363] - loss: 108.301 - mae: 105.559 - mean_q: -43.208 Interval 5005 (2502000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4616 9 episodes - episode_reward: -195.089 [-244.530, -105.629] - loss: 101.613 - mae: 106.326 - mean_q: -42.223 Interval 5006 (2502500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8989 4 episodes - episode_reward: -210.262 [-255.259, -154.422] - loss: 99.287 - mae: 106.253 - mean_q: -42.811 Interval 5007 (2503000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9771 8 episodes - episode_reward: -203.320 [-299.524, -113.798] - loss: 109.763 - mae: 105.453 - mean_q: -44.116 Interval 5008 (2503500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9589 8 episodes - episode_reward: -183.219 [-271.160, -145.264] - loss: 106.078 - mae: 107.322 - mean_q: -41.324 Interval 5009 (2504000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6300 5 episodes - episode_reward: -164.236 [-295.915, 11.419] - loss: 106.270 - mae: 107.872 - mean_q: -41.368 Interval 5010 (2504500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0026 5 episodes - episode_reward: -187.810 [-351.981, -106.723] - loss: 117.246 - mae: 108.971 - mean_q: -40.250 Interval 5011 (2505000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6468 7 episodes - episode_reward: -185.936 [-247.450, -129.942] - loss: 99.558 - mae: 108.726 - mean_q: -40.787 Interval 5012 (2505500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9390 8 episodes - episode_reward: -188.419 [-278.143, -100.000] - loss: 110.869 - mae: 108.143 - mean_q: -42.663 Interval 5013 (2506000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4858 8 episodes - episode_reward: -153.898 [-194.712, -63.727] - loss: 122.392 - mae: 106.831 - mean_q: -44.250 Interval 5014 (2506500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4212 7 episodes - episode_reward: -177.728 [-245.239, -151.664] - loss: 110.231 - mae: 109.169 - mean_q: -41.548 Interval 5015 (2507000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5479 5 episodes - episode_reward: -221.555 [-312.323, -152.211] - loss: 109.836 - mae: 108.480 - mean_q: -40.738 Interval 5016 (2507500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8491 8 episodes - episode_reward: -200.027 [-378.292, -130.994] - loss: 112.327 - mae: 108.907 - mean_q: -41.801 Interval 5017 (2508000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3031 6 episodes - episode_reward: -181.478 [-217.695, -130.717] - loss: 110.750 - mae: 106.158 - mean_q: -47.399 Interval 5018 (2508500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6706 8 episodes - episode_reward: -175.882 [-236.764, -132.518] - loss: 122.712 - mae: 107.474 - mean_q: -43.653 Interval 5019 (2509000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6743 6 episodes - episode_reward: -208.150 [-249.716, -138.032] - loss: 114.286 - mae: 107.598 - mean_q: -44.609 Interval 5020 (2509500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.8032 11 episodes - episode_reward: -178.595 [-284.431, -100.000] - loss: 111.348 - mae: 109.880 - mean_q: -40.679 Interval 5021 (2510000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5187 4 episodes - episode_reward: -173.996 [-196.222, -156.745] - loss: 132.222 - mae: 106.269 - mean_q: -47.720 Interval 5022 (2510500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8415 8 episodes - episode_reward: -180.935 [-246.326, -85.900] - loss: 148.127 - mae: 102.590 - mean_q: -54.071 Interval 5023 (2511000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0448 10 episodes - episode_reward: -201.879 [-292.336, -134.644] - loss: 119.772 - mae: 97.682 - mean_q: -63.216 Interval 5024 (2511500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3185 6 episodes - episode_reward: -202.392 [-236.962, -176.177] - loss: 106.174 - mae: 94.291 - mean_q: -69.122 Interval 5025 (2512000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5780 5 episodes - episode_reward: -260.138 [-433.216, -189.971] - loss: 108.629 - mae: 93.952 - mean_q: -70.880 Interval 5026 (2512500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3961 5 episodes - episode_reward: -139.959 [-176.911, -83.871] - loss: 117.158 - mae: 94.237 - mean_q: -69.793 Interval 5027 (2513000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2536 8 episodes - episode_reward: -198.900 [-275.888, -141.662] - loss: 121.099 - mae: 94.079 - mean_q: -70.510 Interval 5028 (2513500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7909 4 episodes - episode_reward: -226.240 [-301.010, -171.172] - loss: 128.831 - mae: 93.686 - mean_q: -72.040 Interval 5029 (2514000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2289 6 episodes - episode_reward: -183.711 [-251.407, -133.658] - loss: 103.411 - mae: 92.316 - mean_q: -73.135 Interval 5030 (2514500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9252 8 episodes - episode_reward: -188.900 [-288.611, -154.149] - loss: 108.093 - mae: 91.127 - mean_q: -75.330 Interval 5031 (2515000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9950 5 episodes - episode_reward: -191.899 [-229.006, -143.424] - loss: 103.311 - mae: 91.225 - mean_q: -75.259 Interval 5032 (2515500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2782 8 episodes - episode_reward: -204.072 [-261.592, -159.187] - loss: 97.790 - mae: 91.408 - mean_q: -75.555 Interval 5033 (2516000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7959 6 episodes - episode_reward: -232.423 [-365.516, -141.304] - loss: 91.677 - mae: 89.901 - mean_q: -78.317 Interval 5034 (2516500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8246 8 episodes - episode_reward: -172.429 [-236.168, -92.043] - loss: 70.492 - mae: 87.780 - mean_q: -83.457 Interval 5035 (2517000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8013 8 episodes - episode_reward: -178.322 [-248.302, -113.022] - loss: 78.973 - mae: 87.132 - mean_q: -84.606 Interval 5036 (2517500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6447 7 episodes - episode_reward: -188.352 [-236.520, -148.927] - loss: 76.216 - mae: 86.701 - mean_q: -85.371 Interval 5037 (2518000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3839 7 episodes - episode_reward: -171.102 [-248.865, -97.737] - loss: 63.735 - mae: 86.774 - mean_q: -85.438 Interval 5038 (2518500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6412 7 episodes - episode_reward: -193.262 [-290.061, -112.748] - loss: 71.449 - mae: 85.631 - mean_q: -86.648 Interval 5039 (2519000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0941 6 episodes - episode_reward: -158.330 [-251.950, -80.904] - loss: 72.423 - mae: 86.151 - mean_q: -86.357 Interval 5040 (2519500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.1219 6 episodes - episode_reward: -498.638 [-1063.841, -174.653] - loss: 69.745 - mae: 85.761 - mean_q: -87.930 Interval 5041 (2520000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.5226 8 episodes - episode_reward: -555.506 [-967.343, -402.164] - loss: 71.154 - mae: 86.017 - mean_q: -87.093 Interval 5042 (2520500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.6553 6 episodes - episode_reward: -559.590 [-858.533, -378.287] - loss: 69.981 - mae: 85.793 - mean_q: -88.053 Interval 5043 (2521000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.9398 6 episodes - episode_reward: -489.520 [-723.280, -229.653] - loss: 78.213 - mae: 85.434 - mean_q: -89.158 Interval 5044 (2521500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.2215 7 episodes - episode_reward: -656.737 [-1136.837, -410.375] - loss: 64.019 - mae: 85.082 - mean_q: -91.041 Interval 5045 (2522000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3864 7 episodes - episode_reward: -310.572 [-525.855, -140.795] - loss: 79.188 - mae: 85.542 - mean_q: -90.367 Interval 5046 (2522500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5378 6 episodes - episode_reward: -200.817 [-233.152, -120.650] - loss: 62.680 - mae: 84.926 - mean_q: -92.099 Interval 5047 (2523000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1699 8 episodes - episode_reward: -205.287 [-289.166, -145.820] - loss: 59.350 - mae: 84.850 - mean_q: -93.726 Interval 5048 (2523500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6913 7 episodes - episode_reward: -193.528 [-258.941, -146.034] - loss: 48.123 - mae: 84.490 - mean_q: -94.139 Interval 5049 (2524000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8888 8 episodes - episode_reward: -183.843 [-314.755, -100.000] - loss: 71.201 - mae: 84.733 - mean_q: -94.209 Interval 5050 (2524500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6513 8 episodes - episode_reward: -165.377 [-257.692, -62.109] - loss: 49.535 - mae: 84.408 - mean_q: -95.127 Interval 5051 (2525000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8900 7 episodes - episode_reward: -203.363 [-294.795, -114.343] - loss: 51.801 - mae: 84.552 - mean_q: -94.192 Interval 5052 (2525500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1361 6 episodes - episode_reward: -170.306 [-229.730, -128.427] - loss: 59.779 - mae: 84.512 - mean_q: -94.589 Interval 5053 (2526000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9932 8 episodes - episode_reward: -189.915 [-400.363, -81.123] - loss: 51.775 - mae: 84.734 - mean_q: -94.742 Interval 5054 (2526500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9869 10 episodes - episode_reward: -195.658 [-319.240, -100.000] - loss: 45.933 - mae: 84.196 - mean_q: -97.585 Interval 5055 (2527000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8176 7 episodes - episode_reward: -355.387 [-677.721, -139.742] - loss: 49.084 - mae: 84.230 - mean_q: -97.322 Interval 5056 (2527500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9554 5 episodes - episode_reward: -484.630 [-1035.414, -206.893] - loss: 40.407 - mae: 83.849 - mean_q: -99.434 Interval 5057 (2528000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.2022 8 episodes - episode_reward: -282.271 [-504.133, -79.825] - loss: 41.726 - mae: 83.597 - mean_q: -100.531 Interval 5058 (2528500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5417 3 episodes - episode_reward: -1012.391 [-1560.617, -535.320] - loss: 35.732 - mae: 83.856 - mean_q: -100.248 Interval 5059 (2529000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9476 6 episodes - episode_reward: -367.575 [-772.089, -100.000] - loss: 35.093 - mae: 83.638 - mean_q: -102.138 Interval 5060 (2529500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6966 4 episodes - episode_reward: -428.848 [-970.419, -149.688] - loss: 38.432 - mae: 84.171 - mean_q: -100.442 Interval 5061 (2530000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9179 8 episodes - episode_reward: -179.076 [-270.840, -100.000] - loss: 39.677 - mae: 83.951 - mean_q: -101.131 Interval 5062 (2530500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8793 8 episodes - episode_reward: -183.839 [-283.523, -88.400] - loss: 39.025 - mae: 84.175 - mean_q: -101.677 Interval 5063 (2531000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6998 11 episodes - episode_reward: -167.188 [-270.903, -100.000] - loss: 42.031 - mae: 84.130 - mean_q: -101.386 Interval 5064 (2531500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0555 10 episodes - episode_reward: -153.896 [-264.280, -41.723] - loss: 39.362 - mae: 84.415 - mean_q: -100.664 Interval 5065 (2532000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1402 9 episodes - episode_reward: -174.231 [-240.238, -123.460] - loss: 40.049 - mae: 84.459 - mean_q: -101.563 Interval 5066 (2532500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2605 7 episodes - episode_reward: -164.577 [-187.574, -139.031] - loss: 48.172 - mae: 84.962 - mean_q: -100.193 Interval 5067 (2533000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1701 7 episodes - episode_reward: -152.310 [-300.686, -0.773] - loss: 42.341 - mae: 84.774 - mean_q: -100.631 Interval 5068 (2533500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6600 6 episodes - episode_reward: -204.548 [-253.958, -138.367] - loss: 41.890 - mae: 85.318 - mean_q: -100.444 Interval 5069 (2534000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4641 8 episodes - episode_reward: -165.236 [-268.076, 39.371] - loss: 56.235 - mae: 86.267 - mean_q: -98.554 Interval 5070 (2534500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0796 7 episodes - episode_reward: -154.631 [-299.515, -19.249] - loss: 60.893 - mae: 86.274 - mean_q: -98.650 Interval 5071 (2535000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3843 7 episodes - episode_reward: -164.848 [-225.388, -88.738] - loss: 56.634 - mae: 86.462 - mean_q: -99.237 Interval 5072 (2535500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4279 9 episodes - episode_reward: -194.564 [-298.130, -131.266] - loss: 59.779 - mae: 86.598 - mean_q: -98.368 Interval 5073 (2536000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3481 6 episodes - episode_reward: -177.057 [-222.687, -135.279] - loss: 62.041 - mae: 86.905 - mean_q: -98.263 Interval 5074 (2536500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4991 7 episodes - episode_reward: -184.867 [-265.838, -123.927] - loss: 73.752 - mae: 87.076 - mean_q: -98.374 Interval 5075 (2537000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9029 9 episodes - episode_reward: -160.177 [-216.792, -100.000] - loss: 83.250 - mae: 87.438 - mean_q: -97.616 Interval 5076 (2537500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6024 7 episodes - episode_reward: -184.509 [-244.818, -83.291] - loss: 79.169 - mae: 87.643 - mean_q: -96.794 Interval 5077 (2538000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7111 9 episodes - episode_reward: -159.700 [-350.269, -50.375] - loss: 77.520 - mae: 88.063 - mean_q: -97.833 Interval 5078 (2538500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1549 9 episodes - episode_reward: -175.449 [-258.223, -117.164] - loss: 96.201 - mae: 88.681 - mean_q: -96.134 Interval 5079 (2539000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9325 7 episodes - episode_reward: -142.443 [-257.885, 7.776] - loss: 99.414 - mae: 88.242 - mean_q: -96.726 Interval 5080 (2539500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3246 7 episodes - episode_reward: -164.189 [-221.710, -43.628] - loss: 101.925 - mae: 88.799 - mean_q: -95.132 Interval 5081 (2540000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9406 9 episodes - episode_reward: -158.680 [-209.453, -100.000] - loss: 81.942 - mae: 88.308 - mean_q: -96.057 Interval 5082 (2540500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2181 8 episodes - episode_reward: -192.182 [-232.289, -156.459] - loss: 89.533 - mae: 89.322 - mean_q: -94.191 Interval 5083 (2541000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2409 10 episodes - episode_reward: -167.308 [-236.239, -100.000] - loss: 104.858 - mae: 89.843 - mean_q: -92.981 Interval 5084 (2541500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1348 8 episodes - episode_reward: -193.015 [-234.572, -166.256] - loss: 114.531 - mae: 90.617 - mean_q: -90.416 Interval 5085 (2542000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2035 9 episodes - episode_reward: -176.586 [-213.190, -108.074] - loss: 102.312 - mae: 90.277 - mean_q: -92.877 Interval 5086 (2542500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4665 8 episodes - episode_reward: -161.117 [-239.524, -24.739] - loss: 120.556 - mae: 90.715 - mean_q: -92.024 Interval 5087 (2543000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2214 8 episodes - episode_reward: -200.125 [-301.228, -110.229] - loss: 106.822 - mae: 90.550 - mean_q: -93.190 Interval 5088 (2543500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8557 6 episodes - episode_reward: -232.701 [-459.491, -150.395] - loss: 97.049 - mae: 90.329 - mean_q: -94.062 Interval 5089 (2544000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0400 9 episodes - episode_reward: -173.464 [-275.870, -100.000] - loss: 103.002 - mae: 90.786 - mean_q: -94.002 Interval 5090 (2544500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8877 8 episodes - episode_reward: -183.303 [-290.170, -109.582] - loss: 98.993 - mae: 90.696 - mean_q: -94.340 Interval 5091 (2545000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8508 7 episodes - episode_reward: -202.994 [-265.050, -146.391] - loss: 104.960 - mae: 90.799 - mean_q: -93.618 Interval 5092 (2545500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4676 5 episodes - episode_reward: -247.099 [-366.774, -194.193] - loss: 85.105 - mae: 89.817 - mean_q: -96.496 Interval 5093 (2546000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9766 9 episodes - episode_reward: -161.788 [-213.687, -62.976] - loss: 90.106 - mae: 90.514 - mean_q: -95.267 Interval 5094 (2546500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5554 8 episodes - episode_reward: -163.236 [-231.181, -100.000] - loss: 101.422 - mae: 89.869 - mean_q: -95.794 Interval 5095 (2547000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.9582 2 episodes - episode_reward: -203.250 [-221.261, -185.239] - loss: 81.624 - mae: 89.120 - mean_q: -97.565 Interval 5096 (2547500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2437 6 episodes - episode_reward: -201.724 [-266.497, -171.226] - loss: 83.087 - mae: 90.166 - mean_q: -95.583 Interval 5097 (2548000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6806 5 episodes - episode_reward: -168.584 [-186.097, -132.861] - loss: 80.233 - mae: 88.653 - mean_q: -98.101 Interval 5098 (2548500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1618 10 episodes - episode_reward: -157.530 [-265.891, -52.902] - loss: 68.132 - mae: 88.521 - mean_q: -98.479 Interval 5099 (2549000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1194 5 episodes - episode_reward: -201.781 [-269.652, -99.263] - loss: 75.654 - mae: 87.975 - mean_q: -99.194 Interval 5100 (2549500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7601 8 episodes - episode_reward: -178.582 [-227.433, -100.000] - loss: 71.477 - mae: 88.575 - mean_q: -98.703 Interval 5101 (2550000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1788 7 episodes - episode_reward: -149.914 [-288.195, -100.000] - loss: 67.651 - mae: 88.361 - mean_q: -99.557 Interval 5102 (2550500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6367 8 episodes - episode_reward: -163.962 [-313.301, -13.125] - loss: 71.135 - mae: 87.817 - mean_q: -101.070 Interval 5103 (2551000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1194 6 episodes - episode_reward: -174.418 [-214.343, -124.160] - loss: 61.589 - mae: 87.243 - mean_q: -101.998 Interval 5104 (2551500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3656 6 episodes - episode_reward: -178.891 [-205.313, -139.085] - loss: 59.605 - mae: 87.457 - mean_q: -101.494 Interval 5105 (2552000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5638 10 episodes - episode_reward: -189.772 [-284.388, -122.720] - loss: 59.246 - mae: 86.906 - mean_q: -102.281 Interval 5106 (2552500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0200 8 episodes - episode_reward: -184.747 [-221.697, -109.140] - loss: 60.079 - mae: 86.788 - mean_q: -102.956 Interval 5107 (2553000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8451 9 episodes - episode_reward: -167.413 [-228.257, -100.000] - loss: 42.999 - mae: 86.610 - mean_q: -103.606 Interval 5108 (2553500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2736 7 episodes - episode_reward: -165.220 [-234.284, -111.862] - loss: 54.733 - mae: 86.733 - mean_q: -102.977 Interval 5109 (2554000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1209 9 episodes - episode_reward: -170.354 [-234.005, -100.000] - loss: 43.070 - mae: 86.560 - mean_q: -104.650 Interval 5110 (2554500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7667 8 episodes - episode_reward: -238.105 [-411.241, -136.182] - loss: 48.460 - mae: 86.212 - mean_q: -105.841 Interval 5111 (2555000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.6116 10 episodes - episode_reward: -431.134 [-1024.734, -100.000] - loss: 43.186 - mae: 86.041 - mean_q: -104.466 Interval 5112 (2555500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -8.0048 9 episodes - episode_reward: -428.100 [-589.082, -100.000] - loss: 49.019 - mae: 86.083 - mean_q: -104.409 Interval 5113 (2556000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -9.1577 8 episodes - episode_reward: -569.495 [-985.040, -198.424] - loss: 40.996 - mae: 86.211 - mean_q: -104.816 Interval 5114 (2556500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.4741 8 episodes - episode_reward: -419.830 [-913.690, -129.940] - loss: 44.456 - mae: 86.213 - mean_q: -104.611 Interval 5115 (2557000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.0379 8 episodes - episode_reward: -372.674 [-1088.683, -95.421] - loss: 44.406 - mae: 86.216 - mean_q: -105.311 Interval 5116 (2557500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9917 8 episodes - episode_reward: -186.316 [-295.497, -116.322] - loss: 46.411 - mae: 86.114 - mean_q: -104.654 Interval 5117 (2558000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4074 7 episodes - episode_reward: -173.974 [-250.038, -139.385] - loss: 43.083 - mae: 86.487 - mean_q: -104.307 Interval 5118 (2558500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2908 10 episodes - episode_reward: -161.802 [-216.364, -100.000] - loss: 51.193 - mae: 86.267 - mean_q: -104.385 Interval 5119 (2559000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4342 10 episodes - episode_reward: -179.878 [-234.390, -100.000] - loss: 43.251 - mae: 86.112 - mean_q: -105.434 Interval 5120 (2559500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1964 8 episodes - episode_reward: -136.992 [-204.150, -39.791] - loss: 38.522 - mae: 86.257 - mean_q: -105.260 Interval 5121 (2560000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8267 7 episodes - episode_reward: -196.784 [-295.922, -155.765] - loss: 42.874 - mae: 85.950 - mean_q: -106.441 Interval 5122 (2560500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6953 8 episodes - episode_reward: -162.097 [-198.192, -108.980] - loss: 35.424 - mae: 85.778 - mean_q: -107.016 Interval 5123 (2561000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2911 9 episodes - episode_reward: -192.497 [-286.283, -119.641] - loss: 36.039 - mae: 85.982 - mean_q: -106.648 Interval 5124 (2561500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2523 9 episodes - episode_reward: -174.241 [-274.739, -104.374] - loss: 37.502 - mae: 85.984 - mean_q: -106.320 Interval 5125 (2562000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3904 7 episodes - episode_reward: -182.913 [-228.990, -109.965] - loss: 34.399 - mae: 85.883 - mean_q: -107.251 Interval 5126 (2562500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5103 7 episodes - episode_reward: -171.352 [-212.814, -128.448] - loss: 35.973 - mae: 85.672 - mean_q: -107.401 Interval 5127 (2563000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3241 9 episodes - episode_reward: -183.886 [-301.770, -128.420] - loss: 41.511 - mae: 85.876 - mean_q: -106.899 Interval 5128 (2563500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6961 9 episodes - episode_reward: -191.061 [-256.912, -120.128] - loss: 29.150 - mae: 85.774 - mean_q: -107.818 Interval 5129 (2564000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1393 7 episodes - episode_reward: -170.773 [-250.842, -94.262] - loss: 39.324 - mae: 85.965 - mean_q: -107.233 Interval 5130 (2564500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8166 7 episodes - episode_reward: -204.816 [-265.622, -162.061] - loss: 37.925 - mae: 85.917 - mean_q: -107.946 Interval 5131 (2565000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9009 7 episodes - episode_reward: -203.697 [-234.243, -154.569] - loss: 31.230 - mae: 85.866 - mean_q: -108.298 Interval 5132 (2565500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5061 11 episodes - episode_reward: -161.465 [-321.459, -22.415] - loss: 30.601 - mae: 85.842 - mean_q: -108.413 Interval 5133 (2566000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2912 6 episodes - episode_reward: -186.052 [-221.308, -138.373] - loss: 31.094 - mae: 85.969 - mean_q: -108.549 Interval 5134 (2566500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6729 7 episodes - episode_reward: -197.090 [-322.199, -70.950] - loss: 29.896 - mae: 86.205 - mean_q: -108.188 Interval 5135 (2567000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0716 9 episodes - episode_reward: -166.343 [-232.754, -101.103] - loss: 28.845 - mae: 86.111 - mean_q: -108.612 Interval 5136 (2567500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7409 8 episodes - episode_reward: -172.951 [-259.320, -100.000] - loss: 30.858 - mae: 85.945 - mean_q: -108.360 Interval 5137 (2568000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7332 9 episodes - episode_reward: -155.674 [-266.695, 5.233] - loss: 31.401 - mae: 86.075 - mean_q: -108.002 Interval 5138 (2568500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4373 9 episodes - episode_reward: -133.315 [-223.989, 30.273] - loss: 29.122 - mae: 86.086 - mean_q: -107.905 Interval 5139 (2569000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3382 8 episodes - episode_reward: -148.187 [-228.761, 2.181] - loss: 29.427 - mae: 86.207 - mean_q: -108.345 Interval 5140 (2569500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3429 8 episodes - episode_reward: -145.273 [-267.906, -39.335] - loss: 33.861 - mae: 86.149 - mean_q: -108.384 Interval 5141 (2570000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2185 8 episodes - episode_reward: -202.221 [-361.842, -100.000] - loss: 23.750 - mae: 86.537 - mean_q: -107.750 Interval 5142 (2570500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2739 9 episodes - episode_reward: -182.258 [-250.360, -108.998] - loss: 34.012 - mae: 86.477 - mean_q: -107.412 Interval 5143 (2571000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9629 7 episodes - episode_reward: -210.074 [-259.105, -119.877] - loss: 30.617 - mae: 86.406 - mean_q: -108.031 Interval 5144 (2571500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8474 8 episodes - episode_reward: -177.508 [-217.485, -100.000] - loss: 33.513 - mae: 86.534 - mean_q: -108.178 Interval 5145 (2572000 steps performed) 500/500 [==============================] - ETA: 0s - reward: -2.71 - 2s 4ms/step - reward: -2.6742 7 episodes - episode_reward: -183.662 [-302.722, -124.989] - loss: 32.935 - mae: 86.530 - mean_q: -107.998 Interval 5146 (2572500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7737 9 episodes - episode_reward: -161.601 [-227.103, -128.193] - loss: 35.557 - mae: 86.688 - mean_q: -107.331 Interval 5147 (2573000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4622 7 episodes - episode_reward: -170.556 [-326.100, -20.798] - loss: 32.495 - mae: 86.492 - mean_q: -108.058 Interval 5148 (2573500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5941 8 episodes - episode_reward: -166.216 [-263.222, -35.675] - loss: 29.852 - mae: 86.266 - mean_q: -108.139 Interval 5149 (2574000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1080 6 episodes - episode_reward: -160.517 [-217.316, -66.222] - loss: 30.902 - mae: 86.436 - mean_q: -107.667 Interval 5150 (2574500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3534 9 episodes - episode_reward: -194.220 [-323.952, -142.872] - loss: 26.580 - mae: 86.173 - mean_q: -108.088 Interval 5151 (2575000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3317 9 episodes - episode_reward: -183.091 [-279.663, -99.262] - loss: 30.629 - mae: 86.377 - mean_q: -108.106 Interval 5152 (2575500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5174 8 episodes - episode_reward: -157.440 [-248.494, 3.754] - loss: 32.653 - mae: 86.668 - mean_q: -107.369 Interval 5153 (2576000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2284 6 episodes - episode_reward: -168.942 [-304.326, -43.836] - loss: 30.204 - mae: 86.574 - mean_q: -107.585 Interval 5154 (2576500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5912 8 episodes - episode_reward: -179.310 [-241.923, -123.638] - loss: 31.531 - mae: 86.797 - mean_q: -106.440 Interval 5155 (2577000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6478 6 episodes - episode_reward: -219.250 [-302.498, -157.992] - loss: 38.769 - mae: 87.005 - mean_q: -105.736 Interval 5156 (2577500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3247 8 episodes - episode_reward: -208.220 [-283.870, -144.227] - loss: 32.109 - mae: 86.751 - mean_q: -106.758 Interval 5157 (2578000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1323 6 episodes - episode_reward: -161.929 [-250.303, -115.407] - loss: 27.568 - mae: 86.735 - mean_q: -106.652 Interval 5158 (2578500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0218 9 episodes - episode_reward: -178.247 [-294.452, -123.372] - loss: 29.053 - mae: 86.181 - mean_q: -108.395 Interval 5159 (2579000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0622 7 episodes - episode_reward: -216.903 [-304.838, -138.625] - loss: 31.688 - mae: 86.097 - mean_q: -108.460 Interval 5160 (2579500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7242 9 episodes - episode_reward: -153.693 [-235.060, -77.153] - loss: 29.754 - mae: 86.010 - mean_q: -108.571 Interval 5161 (2580000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6113 7 episodes - episode_reward: -179.044 [-278.501, -107.632] - loss: 26.541 - mae: 85.941 - mean_q: -108.998 Interval 5162 (2580500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5129 7 episodes - episode_reward: -173.116 [-205.061, -134.157] - loss: 26.903 - mae: 85.845 - mean_q: -108.660 Interval 5163 (2581000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4356 7 episodes - episode_reward: -184.066 [-266.075, -150.735] - loss: 27.999 - mae: 85.875 - mean_q: -109.619 Interval 5164 (2581500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4087 8 episodes - episode_reward: -154.486 [-294.063, 27.426] - loss: 25.683 - mae: 85.729 - mean_q: -109.471 Interval 5165 (2582000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4500 7 episodes - episode_reward: -174.720 [-221.205, -140.488] - loss: 20.208 - mae: 85.716 - mean_q: -109.514 Interval 5166 (2582500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1898 9 episodes - episode_reward: -173.698 [-367.008, -114.144] - loss: 23.715 - mae: 85.812 - mean_q: -109.457 Interval 5167 (2583000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4885 7 episodes - episode_reward: -174.560 [-261.347, -112.059] - loss: 21.915 - mae: 86.007 - mean_q: -109.730 Interval 5168 (2583500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2058 7 episodes - episode_reward: -164.966 [-220.436, -41.564] - loss: 24.339 - mae: 85.898 - mean_q: -109.966 Interval 5169 (2584000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8479 7 episodes - episode_reward: -196.525 [-315.067, -119.570] - loss: 16.611 - mae: 85.837 - mean_q: -110.454 Interval 5170 (2584500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2018 9 episodes - episode_reward: -176.893 [-259.390, -109.142] - loss: 25.864 - mae: 85.988 - mean_q: -110.200 Interval 5171 (2585000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5170 7 episodes - episode_reward: -132.415 [-281.584, 43.259] - loss: 20.184 - mae: 85.997 - mean_q: -110.387 Interval 5172 (2585500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6962 8 episodes - episode_reward: -147.016 [-238.524, 36.790] - loss: 20.505 - mae: 85.986 - mean_q: -110.643 Interval 5173 (2586000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7105 8 episodes - episode_reward: -174.745 [-272.932, -10.550] - loss: 25.359 - mae: 86.160 - mean_q: -110.435 Interval 5174 (2586500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9275 8 episodes - episode_reward: -181.027 [-243.771, -136.714] - loss: 22.086 - mae: 86.057 - mean_q: -110.338 Interval 5175 (2587000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6569 7 episodes - episode_reward: -190.136 [-292.556, -130.752] - loss: 28.517 - mae: 86.098 - mean_q: -110.144 Interval 5176 (2587500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4013 9 episodes - episode_reward: -186.501 [-226.722, -142.460] - loss: 18.419 - mae: 86.186 - mean_q: -110.252 Interval 5177 (2588000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6218 7 episodes - episode_reward: -186.622 [-246.657, -92.668] - loss: 19.818 - mae: 86.144 - mean_q: -110.151 Interval 5178 (2588500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0844 6 episodes - episode_reward: -175.780 [-241.497, -1.084] - loss: 20.343 - mae: 86.084 - mean_q: -110.282 Interval 5179 (2589000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4149 7 episodes - episode_reward: -179.690 [-213.992, -123.417] - loss: 20.281 - mae: 86.121 - mean_q: -110.344 Interval 5180 (2589500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4512 7 episodes - episode_reward: -169.486 [-218.626, -127.896] - loss: 19.845 - mae: 86.221 - mean_q: -110.031 Interval 5181 (2590000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4084 8 episodes - episode_reward: -145.991 [-300.529, -34.131] - loss: 24.497 - mae: 86.249 - mean_q: -109.620 Interval 5182 (2590500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3196 8 episodes - episode_reward: -150.702 [-251.977, 67.242] - loss: 19.115 - mae: 86.194 - mean_q: -109.923 Interval 5183 (2591000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7937 7 episodes - episode_reward: -186.451 [-223.161, -133.341] - loss: 32.999 - mae: 86.386 - mean_q: -109.623 Interval 5184 (2591500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0623 8 episodes - episode_reward: -188.147 [-256.891, -100.000] - loss: 22.454 - mae: 86.450 - mean_q: -109.486 Interval 5185 (2592000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0515 8 episodes - episode_reward: -198.517 [-252.153, -149.727] - loss: 24.509 - mae: 86.240 - mean_q: -109.453 Interval 5186 (2592500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0268 8 episodes - episode_reward: -196.122 [-414.483, 37.542] - loss: 38.096 - mae: 86.399 - mean_q: -109.071 Interval 5187 (2593000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0795 8 episodes - episode_reward: -195.433 [-266.434, -123.404] - loss: 27.554 - mae: 86.390 - mean_q: -109.200 Interval 5188 (2593500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3789 6 episodes - episode_reward: -181.806 [-298.912, -139.962] - loss: 23.548 - mae: 86.303 - mean_q: -109.249 Interval 5189 (2594000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7484 8 episodes - episode_reward: -180.354 [-211.179, -99.869] - loss: 21.181 - mae: 86.224 - mean_q: -109.444 Interval 5190 (2594500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.5346 4 episodes - episode_reward: -511.536 [-1124.666, -256.519] - loss: 25.834 - mae: 86.376 - mean_q: -109.214 Interval 5191 (2595000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3533 6 episodes - episode_reward: -232.317 [-364.998, -7.838] - loss: 27.798 - mae: 86.306 - mean_q: -109.950 Interval 5192 (2595500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5382 8 episodes - episode_reward: -152.183 [-245.448, -93.892] - loss: 28.976 - mae: 86.275 - mean_q: -110.079 Interval 5193 (2596000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6253 7 episodes - episode_reward: -200.180 [-255.734, -135.031] - loss: 28.261 - mae: 86.242 - mean_q: -110.031 Interval 5194 (2596500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4932 8 episodes - episode_reward: -155.371 [-234.176, 14.830] - loss: 20.801 - mae: 85.859 - mean_q: -111.308 Interval 5195 (2597000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1981 8 episodes - episode_reward: -200.026 [-245.211, -100.000] - loss: 22.377 - mae: 85.742 - mean_q: -111.611 Interval 5196 (2597500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9799 8 episodes - episode_reward: -179.176 [-235.477, -132.785] - loss: 21.756 - mae: 85.643 - mean_q: -111.486 Interval 5197 (2598000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8518 8 episodes - episode_reward: -179.333 [-257.713, -100.000] - loss: 20.611 - mae: 85.645 - mean_q: -111.568 Interval 5198 (2598500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5254 9 episodes - episode_reward: -192.740 [-300.811, -142.140] - loss: 17.146 - mae: 85.584 - mean_q: -111.730 Interval 5199 (2599000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5725 7 episodes - episode_reward: -184.752 [-257.127, -107.632] - loss: 9.265 - mae: 85.531 - mean_q: -111.667 Interval 5200 (2599500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0943 7 episodes - episode_reward: -155.759 [-201.198, -100.000] - loss: 19.058 - mae: 85.570 - mean_q: -111.920 Interval 5201 (2600000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6041 9 episodes - episode_reward: -147.648 [-221.910, 6.491] - loss: 18.999 - mae: 85.512 - mean_q: -111.779 Interval 5202 (2600500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4589 9 episodes - episode_reward: -180.230 [-253.491, -100.000] - loss: 17.395 - mae: 85.476 - mean_q: -111.813 Interval 5203 (2601000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7142 8 episodes - episode_reward: -181.220 [-234.122, -112.087] - loss: 15.960 - mae: 85.483 - mean_q: -111.739 Interval 5204 (2601500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6914 7 episodes - episode_reward: -191.172 [-227.296, -121.664] - loss: 18.627 - mae: 85.434 - mean_q: -111.677 Interval 5205 (2602000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0053 8 episodes - episode_reward: -189.385 [-291.845, -100.000] - loss: 21.688 - mae: 85.447 - mean_q: -111.528 Interval 5206 (2602500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2104 8 episodes - episode_reward: -200.847 [-392.212, -143.022] - loss: 15.039 - mae: 85.403 - mean_q: -111.711 Interval 5207 (2603000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3359 7 episodes - episode_reward: -162.148 [-213.109, -77.911] - loss: 16.233 - mae: 85.424 - mean_q: -111.797 Interval 5208 (2603500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8474 8 episodes - episode_reward: -183.667 [-259.569, -143.989] - loss: 17.519 - mae: 85.394 - mean_q: -111.815 Interval 5209 (2604000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3976 8 episodes - episode_reward: -146.259 [-327.340, 28.516] - loss: 16.468 - mae: 85.347 - mean_q: -111.756 Interval 5210 (2604500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0590 6 episodes - episode_reward: -251.046 [-426.732, -182.682] - loss: 10.236 - mae: 85.302 - mean_q: -111.797 Interval 5211 (2605000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6521 9 episodes - episode_reward: -156.895 [-217.913, -100.000] - loss: 19.919 - mae: 85.335 - mean_q: -111.705 Interval 5212 (2605500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6114 8 episodes - episode_reward: -155.501 [-214.149, -100.000] - loss: 17.166 - mae: 85.297 - mean_q: -111.812 Interval 5213 (2606000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8304 8 episodes - episode_reward: -173.166 [-232.670, -115.369] - loss: 11.838 - mae: 85.219 - mean_q: -111.810 Interval 5214 (2606500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8389 8 episodes - episode_reward: -179.231 [-233.847, -119.051] - loss: 17.176 - mae: 85.189 - mean_q: -111.784 Interval 5215 (2607000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8511 10 episodes - episode_reward: -146.770 [-261.062, 21.901] - loss: 16.124 - mae: 85.117 - mean_q: -111.756 Interval 5216 (2607500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5478 7 episodes - episode_reward: -173.902 [-230.289, -114.948] - loss: 12.244 - mae: 85.050 - mean_q: -111.740 Interval 5217 (2608000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4701 8 episodes - episode_reward: -162.397 [-201.552, -72.954] - loss: 11.322 - mae: 84.980 - mean_q: -111.728 Interval 5218 (2608500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0187 5 episodes - episode_reward: -189.269 [-227.328, -167.488] - loss: 14.250 - mae: 84.919 - mean_q: -111.731 Interval 5219 (2609000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6190 9 episodes - episode_reward: -150.426 [-227.635, -9.288] - loss: 13.204 - mae: 84.841 - mean_q: -111.727 Interval 5220 (2609500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6361 9 episodes - episode_reward: -188.381 [-410.197, -86.658] - loss: 11.112 - mae: 84.783 - mean_q: -111.717 Interval 5221 (2610000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0884 4 episodes - episode_reward: -481.278 [-675.492, -276.910] - loss: 12.081 - mae: 84.734 - mean_q: -111.711 Interval 5222 (2610500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8274 8 episodes - episode_reward: -202.667 [-490.975, -104.419] - loss: 13.516 - mae: 84.715 - mean_q: -111.709 Interval 5223 (2611000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1661 9 episodes - episode_reward: -181.997 [-286.014, -123.544] - loss: 11.987 - mae: 84.687 - mean_q: -111.693 Interval 5224 (2611500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6793 8 episodes - episode_reward: -158.483 [-201.042, -111.832] - loss: 15.365 - mae: 84.668 - mean_q: -111.676 Interval 5225 (2612000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7575 7 episodes - episode_reward: -215.507 [-244.329, -164.550] - loss: 12.005 - mae: 84.616 - mean_q: -111.640 Interval 5226 (2612500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3384 8 episodes - episode_reward: -252.388 [-546.166, -148.152] - loss: 14.420 - mae: 84.593 - mean_q: -111.629 Interval 5227 (2613000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7338 7 episodes - episode_reward: -201.586 [-265.338, -144.192] - loss: 13.077 - mae: 84.550 - mean_q: -111.626 Interval 5228 (2613500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9976 8 episodes - episode_reward: -122.423 [-179.550, 59.056] - loss: 13.035 - mae: 84.533 - mean_q: -111.616 Interval 5229 (2614000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9297 8 episodes - episode_reward: -189.562 [-329.497, -126.763] - loss: 15.135 - mae: 84.506 - mean_q: -111.607 Interval 5230 (2614500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6770 7 episodes - episode_reward: -191.341 [-248.841, -149.300] - loss: 10.648 - mae: 84.451 - mean_q: -111.585 Interval 5231 (2615000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7354 8 episodes - episode_reward: -171.155 [-227.275, -129.507] - loss: 15.110 - mae: 84.434 - mean_q: -111.564 Interval 5232 (2615500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8198 7 episodes - episode_reward: -198.484 [-254.222, -158.280] - loss: 13.411 - mae: 84.393 - mean_q: -111.551 Interval 5233 (2616000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9731 7 episodes - episode_reward: -202.697 [-291.647, -150.212] - loss: 13.937 - mae: 84.370 - mean_q: -111.523 Interval 5234 (2616500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8958 12 episodes - episode_reward: -169.626 [-244.948, -100.000] - loss: 13.125 - mae: 84.326 - mean_q: -111.521 Interval 5235 (2617000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7704 7 episodes - episode_reward: -188.623 [-371.205, -108.146] - loss: 12.386 - mae: 84.309 - mean_q: -111.503 Interval 5236 (2617500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9849 9 episodes - episode_reward: -172.541 [-239.142, -78.955] - loss: 11.261 - mae: 84.274 - mean_q: -111.476 Interval 5237 (2618000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4787 6 episodes - episode_reward: -193.394 [-234.760, -160.660] - loss: 13.167 - mae: 84.245 - mean_q: -111.488 Interval 5238 (2618500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9256 9 episodes - episode_reward: -171.222 [-226.101, -100.000] - loss: 15.510 - mae: 84.241 - mean_q: -111.453 Interval 5239 (2619000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7626 9 episodes - episode_reward: -153.532 [-213.403, -89.359] - loss: 11.142 - mae: 84.195 - mean_q: -111.454 Interval 5240 (2619500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4692 6 episodes - episode_reward: -189.658 [-233.051, -153.568] - loss: 10.458 - mae: 84.180 - mean_q: -111.473 Interval 5241 (2620000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7577 9 episodes - episode_reward: -160.217 [-239.933, -55.154] - loss: 12.027 - mae: 84.181 - mean_q: -111.476 Interval 5242 (2620500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9879 7 episodes - episode_reward: -201.702 [-243.296, -147.004] - loss: 12.625 - mae: 84.169 - mean_q: -111.473 Interval 5243 (2621000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7866 8 episodes - episode_reward: -246.012 [-556.987, -157.294] - loss: 9.299 - mae: 84.154 - mean_q: -111.486 Interval 5244 (2621500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2338 9 episodes - episode_reward: -185.639 [-258.646, -134.275] - loss: 11.160 - mae: 84.151 - mean_q: -111.477 Interval 5245 (2622000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2694 7 episodes - episode_reward: -219.879 [-331.238, -177.095] - loss: 8.519 - mae: 84.131 - mean_q: -111.513 Interval 5246 (2622500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8544 9 episodes - episode_reward: -163.543 [-221.979, -46.010] - loss: 14.401 - mae: 84.158 - mean_q: -111.528 Interval 5247 (2623000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4248 8 episodes - episode_reward: -155.012 [-266.490, -85.142] - loss: 12.013 - mae: 84.141 - mean_q: -111.517 Interval 5248 (2623500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1886 9 episodes - episode_reward: -171.136 [-239.835, -100.000] - loss: 11.795 - mae: 84.131 - mean_q: -111.510 Interval 5249 (2624000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.1108 8 episodes - episode_reward: -310.591 [-980.822, -151.906] - loss: 10.959 - mae: 84.110 - mean_q: -111.522 Interval 5250 (2624500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5290 9 episodes - episode_reward: -266.101 [-630.046, -121.464] - loss: 15.947 - mae: 84.134 - mean_q: -111.516 Interval 5251 (2625000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9477 7 episodes - episode_reward: -206.168 [-263.719, -181.277] - loss: 11.250 - mae: 84.132 - mean_q: -111.526 Interval 5252 (2625500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6419 7 episodes - episode_reward: -186.146 [-227.511, -159.774] - loss: 12.080 - mae: 84.158 - mean_q: -111.547 Interval 5253 (2626000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4059 7 episodes - episode_reward: -188.370 [-245.510, -109.488] - loss: 11.105 - mae: 84.161 - mean_q: -111.549 Interval 5254 (2626500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4816 9 episodes - episode_reward: -182.104 [-271.026, -40.899] - loss: 12.519 - mae: 84.186 - mean_q: -111.538 Interval 5255 (2627000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9230 8 episodes - episode_reward: -183.969 [-274.498, -119.222] - loss: 11.284 - mae: 84.194 - mean_q: -111.552 Interval 5256 (2627500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9290 7 episodes - episode_reward: -195.612 [-236.518, -142.018] - loss: 10.618 - mae: 84.197 - mean_q: -111.592 Interval 5257 (2628000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9849 10 episodes - episode_reward: -156.952 [-236.949, -99.238] - loss: 14.791 - mae: 84.222 - mean_q: -111.596 Interval 5258 (2628500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8879 12 episodes - episode_reward: -162.364 [-284.069, -100.000] - loss: 12.262 - mae: 84.238 - mean_q: -111.588 Interval 5259 (2629000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8875 7 episodes - episode_reward: -201.550 [-273.590, -147.326] - loss: 17.376 - mae: 84.276 - mean_q: -111.556 Interval 5260 (2629500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0629 9 episodes - episode_reward: -173.716 [-231.409, -130.309] - loss: 13.169 - mae: 84.250 - mean_q: -111.541 Interval 5261 (2630000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7822 8 episodes - episode_reward: -181.373 [-226.443, -129.120] - loss: 9.676 - mae: 84.271 - mean_q: -111.557 Interval 5262 (2630500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1124 9 episodes - episode_reward: -226.416 [-521.824, -144.167] - loss: 9.230 - mae: 84.271 - mean_q: -111.579 Interval 5263 (2631000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0113 7 episodes - episode_reward: -209.984 [-464.567, -100.000] - loss: 13.036 - mae: 84.299 - mean_q: -111.580 Interval 5264 (2631500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6002 8 episodes - episode_reward: -162.188 [-226.656, -98.922] - loss: 12.597 - mae: 84.318 - mean_q: -111.580 Interval 5265 (2632000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1580 7 episodes - episode_reward: -219.262 [-514.602, -138.617] - loss: 12.095 - mae: 84.320 - mean_q: -111.571 Interval 5266 (2632500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9551 9 episodes - episode_reward: -170.357 [-232.799, -113.203] - loss: 13.334 - mae: 84.344 - mean_q: -111.564 Interval 5267 (2633000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0371 9 episodes - episode_reward: -164.937 [-202.647, -121.537] - loss: 10.855 - mae: 84.329 - mean_q: -111.564 Interval 5268 (2633500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0697 9 episodes - episode_reward: -173.460 [-263.865, -30.344] - loss: 15.336 - mae: 84.354 - mean_q: -111.561 Interval 5269 (2634000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9755 7 episodes - episode_reward: -208.903 [-344.644, -137.941] - loss: 11.081 - mae: 84.350 - mean_q: -111.572 Interval 5270 (2634500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8867 7 episodes - episode_reward: -207.986 [-299.989, -160.371] - loss: 15.205 - mae: 84.384 - mean_q: -111.590 Interval 5271 (2635000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6865 8 episodes - episode_reward: -168.349 [-232.303, -115.092] - loss: 12.219 - mae: 84.383 - mean_q: -111.575 Interval 5272 (2635500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6071 8 episodes - episode_reward: -168.708 [-218.184, -11.126] - loss: 11.679 - mae: 84.385 - mean_q: -111.596 Interval 5273 (2636000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3218 9 episodes - episode_reward: -182.105 [-276.029, -140.922] - loss: 11.712 - mae: 84.379 - mean_q: -111.624 Interval 5274 (2636500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6493 8 episodes - episode_reward: -160.557 [-211.649, -103.599] - loss: 9.834 - mae: 84.374 - mean_q: -111.667 Interval 5275 (2637000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4699 10 episodes - episode_reward: -178.837 [-227.138, -119.875] - loss: 11.090 - mae: 84.392 - mean_q: -111.690 Interval 5276 (2637500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4635 10 episodes - episode_reward: -174.270 [-252.282, -127.979] - loss: 9.900 - mae: 84.411 - mean_q: -111.720 Interval 5277 (2638000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9433 8 episodes - episode_reward: -184.242 [-232.252, -143.063] - loss: 8.814 - mae: 84.416 - mean_q: -111.727 Interval 5278 (2638500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7318 7 episodes - episode_reward: -187.716 [-238.573, -109.129] - loss: 10.790 - mae: 84.449 - mean_q: -111.780 Interval 5279 (2639000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0602 7 episodes - episode_reward: -151.577 [-216.500, -14.692] - loss: 9.247 - mae: 84.455 - mean_q: -111.796 Interval 5280 (2639500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5527 7 episodes - episode_reward: -173.064 [-209.338, -131.371] - loss: 11.687 - mae: 84.493 - mean_q: -111.816 Interval 5281 (2640000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0042 8 episodes - episode_reward: -196.744 [-303.145, -100.000] - loss: 7.677 - mae: 84.492 - mean_q: -111.870 Interval 5282 (2640500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0006 9 episodes - episode_reward: -167.388 [-265.100, -84.956] - loss: 9.698 - mae: 84.528 - mean_q: -111.897 Interval 5283 (2641000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3866 9 episodes - episode_reward: -187.860 [-272.555, -100.000] - loss: 8.444 - mae: 84.538 - mean_q: -111.950 Interval 5284 (2641500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7470 7 episodes - episode_reward: -199.152 [-240.395, -145.382] - loss: 11.361 - mae: 84.574 - mean_q: -111.989 Interval 5285 (2642000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3880 5 episodes - episode_reward: -205.030 [-402.198, -64.178] - loss: 9.135 - mae: 84.592 - mean_q: -112.019 Interval 5286 (2642500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0375 8 episodes - episode_reward: -204.918 [-402.282, -85.389] - loss: 9.022 - mae: 84.601 - mean_q: -112.063 Interval 5287 (2643000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9847 5 episodes - episode_reward: -304.988 [-566.703, -163.788] - loss: 8.466 - mae: 84.628 - mean_q: -112.089 Interval 5288 (2643500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8811 7 episodes - episode_reward: -135.032 [-184.829, -55.515] - loss: 11.900 - mae: 84.663 - mean_q: -112.114 Interval 5289 (2644000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.0617 6 episodes - episode_reward: -368.603 [-610.649, -117.053] - loss: 10.185 - mae: 84.684 - mean_q: -112.129 Interval 5290 (2644500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1158 8 episodes - episode_reward: -234.770 [-452.674, -147.333] - loss: 10.989 - mae: 84.715 - mean_q: -112.141 Interval 5291 (2645000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4437 7 episodes - episode_reward: -165.382 [-202.305, -100.000] - loss: 7.604 - mae: 84.733 - mean_q: -112.148 Interval 5292 (2645500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9939 8 episodes - episode_reward: -191.203 [-243.541, -147.103] - loss: 8.875 - mae: 84.738 - mean_q: -112.179 Interval 5293 (2646000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1648 9 episodes - episode_reward: -172.906 [-253.911, -100.000] - loss: 10.499 - mae: 84.766 - mean_q: -112.213 Interval 5294 (2646500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7731 7 episodes - episode_reward: -203.127 [-363.334, -149.087] - loss: 11.392 - mae: 84.795 - mean_q: -112.226 Interval 5295 (2647000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4599 9 episodes - episode_reward: -191.843 [-254.495, -131.470] - loss: 11.980 - mae: 84.824 - mean_q: -112.238 Interval 5296 (2647500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5600 7 episodes - episode_reward: -183.774 [-272.509, -93.896] - loss: 14.069 - mae: 84.840 - mean_q: -112.238 Interval 5297 (2648000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0052 7 episodes - episode_reward: -209.024 [-266.733, -143.668] - loss: 12.708 - mae: 84.848 - mean_q: -112.231 Interval 5298 (2648500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2018 10 episodes - episode_reward: -164.473 [-228.120, -112.060] - loss: 8.313 - mae: 84.847 - mean_q: -112.243 Interval 5299 (2649000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2004 7 episodes - episode_reward: -233.997 [-358.169, -27.579] - loss: 14.787 - mae: 84.890 - mean_q: -112.261 Interval 5300 (2649500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7269 7 episodes - episode_reward: -188.296 [-321.052, -111.095] - loss: 10.713 - mae: 84.870 - mean_q: -112.243 Interval 5301 (2650000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3770 7 episodes - episode_reward: -165.069 [-210.472, -82.721] - loss: 12.110 - mae: 84.897 - mean_q: -112.255 Interval 5302 (2650500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7951 8 episodes - episode_reward: -179.030 [-262.641, -146.269] - loss: 9.651 - mae: 84.894 - mean_q: -112.276 Interval 5303 (2651000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9432 7 episodes - episode_reward: -192.723 [-250.854, -109.035] - loss: 10.542 - mae: 84.886 - mean_q: -112.314 Interval 5304 (2651500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2915 8 episodes - episode_reward: -208.997 [-275.488, -112.106] - loss: 13.001 - mae: 84.894 - mean_q: -112.338 Interval 5305 (2652000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0651 9 episodes - episode_reward: -179.218 [-258.872, -100.000] - loss: 8.943 - mae: 84.897 - mean_q: -112.355 Interval 5306 (2652500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9182 6 episodes - episode_reward: -145.426 [-223.094, 17.691] - loss: 10.728 - mae: 84.937 - mean_q: -112.399 Interval 5307 (2653000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0410 10 episodes - episode_reward: -164.254 [-280.053, -100.000] - loss: 12.446 - mae: 84.973 - mean_q: -112.427 Interval 5308 (2653500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1427 9 episodes - episode_reward: -174.630 [-252.057, -75.602] - loss: 10.324 - mae: 84.986 - mean_q: -112.438 Interval 5309 (2654000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0406 6 episodes - episode_reward: -169.591 [-328.843, -27.114] - loss: 9.856 - mae: 85.007 - mean_q: -112.467 Interval 5310 (2654500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1974 8 episodes - episode_reward: -197.296 [-300.612, -132.156] - loss: 11.489 - mae: 85.049 - mean_q: -112.491 Interval 5311 (2655000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2456 8 episodes - episode_reward: -201.488 [-263.582, -117.788] - loss: 9.911 - mae: 85.051 - mean_q: -112.514 Interval 5312 (2655500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7083 7 episodes - episode_reward: -192.431 [-260.671, -106.913] - loss: 9.635 - mae: 85.070 - mean_q: -112.541 Interval 5313 (2656000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9478 6 episodes - episode_reward: -163.398 [-232.948, -83.428] - loss: 11.888 - mae: 85.095 - mean_q: -112.550 Interval 5314 (2656500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3109 10 episodes - episode_reward: -165.681 [-198.297, -133.685] - loss: 11.786 - mae: 85.094 - mean_q: -112.567 Interval 5315 (2657000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5200 7 episodes - episode_reward: -178.915 [-252.734, -141.419] - loss: 10.449 - mae: 85.121 - mean_q: -112.592 Interval 5316 (2657500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0401 7 episodes - episode_reward: -215.219 [-280.326, -182.494] - loss: 13.187 - mae: 85.137 - mean_q: -112.611 Interval 5317 (2658000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8502 8 episodes - episode_reward: -173.497 [-232.205, -127.862] - loss: 11.787 - mae: 85.159 - mean_q: -112.635 Interval 5318 (2658500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8601 8 episodes - episode_reward: -187.855 [-280.298, -139.517] - loss: 9.301 - mae: 85.157 - mean_q: -112.663 Interval 5319 (2659000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3072 8 episodes - episode_reward: -208.567 [-300.863, -133.473] - loss: 11.226 - mae: 85.198 - mean_q: -112.699 Interval 5320 (2659500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1866 7 episodes - episode_reward: -150.162 [-226.375, -29.156] - loss: 8.926 - mae: 85.197 - mean_q: -112.722 Interval 5321 (2660000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5110 6 episodes - episode_reward: -197.795 [-362.513, -0.604] - loss: 9.253 - mae: 85.201 - mean_q: -112.763 Interval 5322 (2660500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.2379 9 episodes - episode_reward: -350.581 [-641.965, -117.877] - loss: 10.956 - mae: 85.228 - mean_q: -112.787 Interval 5323 (2661000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8830 6 episodes - episode_reward: -325.756 [-623.423, -191.447] - loss: 12.640 - mae: 85.274 - mean_q: -112.822 Interval 5324 (2661500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7906 7 episodes - episode_reward: -205.764 [-241.632, -166.367] - loss: 11.156 - mae: 85.328 - mean_q: -112.814 Interval 5325 (2662000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0010 6 episodes - episode_reward: -157.412 [-225.365, -40.977] - loss: 11.183 - mae: 85.349 - mean_q: -112.814 Interval 5326 (2662500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1828 7 episodes - episode_reward: -164.913 [-270.303, 16.317] - loss: 14.112 - mae: 85.385 - mean_q: -112.829 Interval 5327 (2663000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4637 8 episodes - episode_reward: -154.875 [-224.693, -100.000] - loss: 9.575 - mae: 85.384 - mean_q: -112.838 Interval 5328 (2663500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2411 6 episodes - episode_reward: -171.136 [-277.464, -54.410] - loss: 11.547 - mae: 85.419 - mean_q: -112.852 Interval 5329 (2664000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5857 8 episodes - episode_reward: -171.470 [-225.894, 4.849] - loss: 9.912 - mae: 85.424 - mean_q: -112.888 Interval 5330 (2664500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8927 8 episodes - episode_reward: -182.626 [-257.753, -100.000] - loss: 11.648 - mae: 85.453 - mean_q: -112.912 Interval 5331 (2665000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2039 6 episodes - episode_reward: -184.719 [-231.493, -95.080] - loss: 10.544 - mae: 85.471 - mean_q: -112.901 Interval 5332 (2665500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5128 8 episodes - episode_reward: -156.870 [-228.514, -18.220] - loss: 12.370 - mae: 85.487 - mean_q: -112.910 Interval 5333 (2666000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0158 6 episodes - episode_reward: -230.228 [-313.809, -134.156] - loss: 11.299 - mae: 85.513 - mean_q: -112.905 Interval 5334 (2666500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9658 9 episodes - episode_reward: -173.632 [-265.913, -100.000] - loss: 10.724 - mae: 85.534 - mean_q: -112.919 Interval 5335 (2667000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5139 10 episodes - episode_reward: -180.068 [-267.047, -126.567] - loss: 12.133 - mae: 85.546 - mean_q: -112.935 Interval 5336 (2667500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8462 7 episodes - episode_reward: -198.899 [-270.277, -122.816] - loss: 11.037 - mae: 85.564 - mean_q: -112.934 Interval 5337 (2668000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0445 7 episodes - episode_reward: -134.151 [-203.009, -85.997] - loss: 10.407 - mae: 85.580 - mean_q: -112.956 Interval 5338 (2668500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9958 10 episodes - episode_reward: -158.817 [-218.460, -30.302] - loss: 14.482 - mae: 85.618 - mean_q: -112.938 Interval 5339 (2669000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0580 9 episodes - episode_reward: -112.681 [-237.650, 41.103] - loss: 10.947 - mae: 85.616 - mean_q: -112.920 Interval 5340 (2669500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4656 7 episodes - episode_reward: -174.697 [-210.949, -126.502] - loss: 9.834 - mae: 85.614 - mean_q: -112.938 Interval 5341 (2670000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9449 6 episodes - episode_reward: -161.537 [-284.393, -5.115] - loss: 12.805 - mae: 85.643 - mean_q: -112.928 Interval 5342 (2670500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8364 9 episodes - episode_reward: -213.613 [-314.666, -100.000] - loss: 11.738 - mae: 85.647 - mean_q: -112.923 Interval 5343 (2671000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1580 7 episodes - episode_reward: -165.809 [-285.536, -13.104] - loss: 16.555 - mae: 85.653 - mean_q: -112.892 Interval 5344 (2671500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8749 7 episodes - episode_reward: -196.084 [-231.891, -129.747] - loss: 13.464 - mae: 85.634 - mean_q: -112.863 Interval 5345 (2672000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7894 8 episodes - episode_reward: -167.588 [-322.374, -0.445] - loss: 13.205 - mae: 85.610 - mean_q: -112.837 Interval 5346 (2672500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2631 10 episodes - episode_reward: -174.432 [-230.499, -109.960] - loss: 12.003 - mae: 85.604 - mean_q: -112.835 Interval 5347 (2673000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2152 9 episodes - episode_reward: -175.800 [-279.002, -121.848] - loss: 10.017 - mae: 85.589 - mean_q: -112.824 Interval 5348 (2673500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2023 6 episodes - episode_reward: -175.639 [-273.586, 5.624] - loss: 11.754 - mae: 85.581 - mean_q: -112.821 Interval 5349 (2674000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8076 7 episodes - episode_reward: -196.471 [-262.548, -149.037] - loss: 14.715 - mae: 85.569 - mean_q: -112.807 Interval 5350 (2674500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0077 9 episodes - episode_reward: -173.631 [-282.398, -48.666] - loss: 11.342 - mae: 85.525 - mean_q: -112.784 Interval 5351 (2675000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3027 6 episodes - episode_reward: -183.455 [-250.784, -153.253] - loss: 11.303 - mae: 85.495 - mean_q: -112.774 Interval 5352 (2675500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4465 8 episodes - episode_reward: -159.862 [-249.030, -100.000] - loss: 11.089 - mae: 85.471 - mean_q: -112.772 Interval 5353 (2676000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2200 7 episodes - episode_reward: -151.112 [-233.308, -107.320] - loss: 12.572 - mae: 85.450 - mean_q: -112.760 Interval 5354 (2676500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3245 10 episodes - episode_reward: -172.324 [-239.339, -109.439] - loss: 11.167 - mae: 85.426 - mean_q: -112.723 Interval 5355 (2677000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2713 8 episodes - episode_reward: -197.375 [-279.840, -132.337] - loss: 12.902 - mae: 85.406 - mean_q: -112.693 Interval 5356 (2677500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5701 7 episodes - episode_reward: -186.570 [-225.164, -133.129] - loss: 9.400 - mae: 85.374 - mean_q: -112.679 Interval 5357 (2678000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1736 9 episodes - episode_reward: -180.016 [-260.766, -100.000] - loss: 13.896 - mae: 85.366 - mean_q: -112.669 Interval 5358 (2678500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7550 8 episodes - episode_reward: -170.383 [-253.088, -93.949] - loss: 10.404 - mae: 85.336 - mean_q: -112.665 Interval 5359 (2679000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4317 7 episodes - episode_reward: -164.191 [-243.292, -6.834] - loss: 9.169 - mae: 85.300 - mean_q: -112.689 Interval 5360 (2679500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5545 7 episodes - episode_reward: -187.646 [-215.129, -143.486] - loss: 11.393 - mae: 85.292 - mean_q: -112.701 Interval 5361 (2680000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3130 7 episodes - episode_reward: -162.912 [-258.778, -128.689] - loss: 9.526 - mae: 85.270 - mean_q: -112.701 Interval 5362 (2680500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4165 8 episodes - episode_reward: -150.387 [-277.072, 17.381] - loss: 12.290 - mae: 85.267 - mean_q: -112.719 Interval 5363 (2681000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9449 9 episodes - episode_reward: -170.964 [-271.725, -106.501] - loss: 13.807 - mae: 85.265 - mean_q: -112.680 Interval 5364 (2681500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1511 7 episodes - episode_reward: -154.003 [-176.798, -100.687] - loss: 9.635 - mae: 85.212 - mean_q: -112.665 Interval 5365 (2682000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9572 8 episodes - episode_reward: -183.037 [-266.060, -116.992] - loss: 11.418 - mae: 85.198 - mean_q: -112.671 Interval 5366 (2682500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0947 7 episodes - episode_reward: -218.551 [-313.670, -167.996] - loss: 14.738 - mae: 85.208 - mean_q: -112.648 Interval 5367 (2683000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7402 7 episodes - episode_reward: -194.969 [-283.174, -153.755] - loss: 13.190 - mae: 85.176 - mean_q: -112.607 Interval 5368 (2683500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3124 7 episodes - episode_reward: -161.038 [-219.558, -92.482] - loss: 13.040 - mae: 85.136 - mean_q: -112.571 Interval 5369 (2684000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8490 10 episodes - episode_reward: -190.116 [-261.275, -125.249] - loss: 13.068 - mae: 85.123 - mean_q: -112.538 Interval 5370 (2684500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8524 10 episodes - episode_reward: -151.914 [-200.024, -85.901] - loss: 14.612 - mae: 85.111 - mean_q: -112.506 Interval 5371 (2685000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3981 7 episodes - episode_reward: -154.179 [-243.215, -111.868] - loss: 12.516 - mae: 85.094 - mean_q: -112.441 Interval 5372 (2685500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2591 7 episodes - episode_reward: -179.097 [-246.467, -119.198] - loss: 13.340 - mae: 85.067 - mean_q: -112.418 Interval 5373 (2686000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8773 7 episodes - episode_reward: -197.861 [-257.749, -133.375] - loss: 11.444 - mae: 85.038 - mean_q: -112.391 Interval 5374 (2686500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6494 8 episodes - episode_reward: -172.784 [-257.201, -123.235] - loss: 13.137 - mae: 85.003 - mean_q: -112.382 Interval 5375 (2687000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6663 7 episodes - episode_reward: -184.352 [-245.965, -114.256] - loss: 15.397 - mae: 85.001 - mean_q: -112.339 Interval 5376 (2687500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3714 7 episodes - episode_reward: -230.502 [-408.208, -114.914] - loss: 13.868 - mae: 84.966 - mean_q: -112.327 Interval 5377 (2688000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6528 8 episodes - episode_reward: -172.598 [-239.243, -100.000] - loss: 11.240 - mae: 84.954 - mean_q: -112.301 Interval 5378 (2688500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9180 8 episodes - episode_reward: -187.772 [-261.856, -110.118] - loss: 14.425 - mae: 84.954 - mean_q: -112.265 Interval 5379 (2689000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7174 7 episodes - episode_reward: -184.634 [-254.185, -122.285] - loss: 12.588 - mae: 84.926 - mean_q: -112.261 Interval 5380 (2689500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8046 9 episodes - episode_reward: -163.590 [-185.117, -107.346] - loss: 16.488 - mae: 84.920 - mean_q: -112.217 Interval 5381 (2690000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1934 9 episodes - episode_reward: -170.806 [-238.685, -100.000] - loss: 10.617 - mae: 84.877 - mean_q: -112.187 Interval 5382 (2690500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3974 9 episodes - episode_reward: -142.909 [-202.631, -44.481] - loss: 13.827 - mae: 84.877 - mean_q: -112.191 Interval 5383 (2691000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8816 7 episodes - episode_reward: -195.487 [-248.250, -126.005] - loss: 14.100 - mae: 84.872 - mean_q: -112.159 Interval 5384 (2691500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5830 8 episodes - episode_reward: -162.724 [-250.007, 39.443] - loss: 15.459 - mae: 84.849 - mean_q: -112.118 Interval 5385 (2692000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5002 9 episodes - episode_reward: -194.552 [-317.195, -148.135] - loss: 12.269 - mae: 84.827 - mean_q: -112.087 Interval 5386 (2692500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0945 8 episodes - episode_reward: -197.168 [-368.094, -145.205] - loss: 11.998 - mae: 84.796 - mean_q: -112.070 Interval 5387 (2693000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0848 7 episodes - episode_reward: -139.892 [-195.561, -52.165] - loss: 12.772 - mae: 84.785 - mean_q: -112.050 Interval 5388 (2693500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0498 9 episodes - episode_reward: -172.754 [-292.633, -72.677] - loss: 11.905 - mae: 84.763 - mean_q: -112.032 Interval 5389 (2694000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4979 8 episodes - episode_reward: -215.616 [-346.239, -137.489] - loss: 10.644 - mae: 84.733 - mean_q: -112.031 Interval 5390 (2694500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0647 7 episodes - episode_reward: -149.434 [-213.557, 66.043] - loss: 10.917 - mae: 84.711 - mean_q: -112.024 Interval 5391 (2695000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5935 9 episodes - episode_reward: -148.490 [-175.992, -111.698] - loss: 12.242 - mae: 84.683 - mean_q: -112.037 Interval 5392 (2695500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5747 7 episodes - episode_reward: -182.079 [-228.217, -122.345] - loss: 12.951 - mae: 84.673 - mean_q: -112.002 Interval 5393 (2696000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3085 7 episodes - episode_reward: -168.133 [-207.894, -138.270] - loss: 12.630 - mae: 84.622 - mean_q: -111.977 Interval 5394 (2696500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8995 7 episodes - episode_reward: -194.427 [-259.522, -127.136] - loss: 14.807 - mae: 84.607 - mean_q: -111.940 Interval 5395 (2697000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2856 9 episodes - episode_reward: -186.506 [-251.274, -106.771] - loss: 10.758 - mae: 84.578 - mean_q: -111.913 Interval 5396 (2697500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2317 8 episodes - episode_reward: -203.393 [-283.187, -143.208] - loss: 17.704 - mae: 84.567 - mean_q: -111.874 Interval 5397 (2698000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0741 7 episodes - episode_reward: -153.456 [-196.901, -81.153] - loss: 15.320 - mae: 84.532 - mean_q: -111.815 Interval 5398 (2698500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3027 7 episodes - episode_reward: -159.994 [-232.979, -27.465] - loss: 12.558 - mae: 84.506 - mean_q: -111.790 Interval 5399 (2699000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5446 7 episodes - episode_reward: -186.951 [-235.680, -158.938] - loss: 10.686 - mae: 84.478 - mean_q: -111.780 Interval 5400 (2699500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3150 9 episodes - episode_reward: -173.770 [-255.207, -100.000] - loss: 9.619 - mae: 84.435 - mean_q: -111.761 Interval 5401 (2700000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4743 7 episodes - episode_reward: -182.632 [-253.858, -129.364] - loss: 11.488 - mae: 84.415 - mean_q: -111.757 Interval 5402 (2700500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5560 6 episodes - episode_reward: -213.889 [-254.026, -192.478] - loss: 11.183 - mae: 84.422 - mean_q: -111.731 Interval 5403 (2701000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8732 10 episodes - episode_reward: -191.140 [-266.584, -100.000] - loss: 10.918 - mae: 84.396 - mean_q: -111.686 Interval 5404 (2701500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2767 10 episodes - episode_reward: -171.254 [-285.548, -90.416] - loss: 14.144 - mae: 84.377 - mean_q: -111.659 Interval 5405 (2702000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3837 7 episodes - episode_reward: -166.225 [-213.644, -109.211] - loss: 11.198 - mae: 84.339 - mean_q: -111.641 Interval 5406 (2702500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4874 9 episodes - episode_reward: -192.715 [-259.639, -122.497] - loss: 10.271 - mae: 84.322 - mean_q: -111.618 Interval 5407 (2703000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0521 9 episodes - episode_reward: -169.687 [-221.507, -100.000] - loss: 13.915 - mae: 84.321 - mean_q: -111.597 Interval 5408 (2703500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4296 7 episodes - episode_reward: -173.278 [-242.915, -133.869] - loss: 12.338 - mae: 84.312 - mean_q: -111.577 Interval 5409 (2704000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6477 7 episodes - episode_reward: -186.371 [-240.447, -120.946] - loss: 16.901 - mae: 84.305 - mean_q: -111.539 Interval 5410 (2704500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1795 9 episodes - episode_reward: -181.356 [-242.353, -139.827] - loss: 13.127 - mae: 84.267 - mean_q: -111.499 Interval 5411 (2705000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8643 8 episodes - episode_reward: -172.025 [-242.282, -109.124] - loss: 17.218 - mae: 84.252 - mean_q: -111.452 Interval 5412 (2705500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8072 7 episodes - episode_reward: -198.222 [-240.610, -165.404] - loss: 10.794 - mae: 84.197 - mean_q: -111.405 Interval 5413 (2706000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7261 7 episodes - episode_reward: -183.835 [-272.821, -106.077] - loss: 12.659 - mae: 84.181 - mean_q: -111.401 Interval 5414 (2706500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7673 8 episodes - episode_reward: -191.023 [-267.309, -140.166] - loss: 13.909 - mae: 84.177 - mean_q: -111.362 Interval 5415 (2707000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0631 7 episodes - episode_reward: -203.587 [-314.875, -100.000] - loss: 12.843 - mae: 84.132 - mean_q: -111.326 Interval 5416 (2707500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3582 8 episodes - episode_reward: -149.400 [-226.274, -6.496] - loss: 14.842 - mae: 84.121 - mean_q: -111.299 Interval 5417 (2708000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0632 7 episodes - episode_reward: -224.808 [-307.848, -121.244] - loss: 13.230 - mae: 84.086 - mean_q: -111.280 Interval 5418 (2708500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9980 9 episodes - episode_reward: -169.690 [-265.839, 13.759] - loss: 9.600 - mae: 84.059 - mean_q: -111.268 Interval 5419 (2709000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2463 8 episodes - episode_reward: -190.306 [-273.198, -107.206] - loss: 10.981 - mae: 84.057 - mean_q: -111.261 Interval 5420 (2709500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7268 8 episodes - episode_reward: -187.187 [-249.735, -143.278] - loss: 10.534 - mae: 84.031 - mean_q: -111.232 Interval 5421 (2710000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4877 7 episodes - episode_reward: -167.604 [-224.654, -115.998] - loss: 13.744 - mae: 84.006 - mean_q: -111.205 Interval 5422 (2710500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5548 10 episodes - episode_reward: -172.774 [-337.802, -17.631] - loss: 11.830 - mae: 83.955 - mean_q: -111.173 Interval 5423 (2711000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3942 8 episodes - episode_reward: -156.607 [-202.088, 5.192] - loss: 11.139 - mae: 83.902 - mean_q: -111.155 Interval 5424 (2711500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7066 5 episodes - episode_reward: -426.870 [-691.113, -204.200] - loss: 11.444 - mae: 83.862 - mean_q: -111.080 Interval 5425 (2712000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.7884 3 episodes - episode_reward: -1048.335 [-1605.599, -561.029] - loss: 15.158 - mae: 83.842 - mean_q: -110.998 Interval 5426 (2712500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6717 7 episodes - episode_reward: -193.710 [-257.751, -149.623] - loss: 15.954 - mae: 83.851 - mean_q: -111.013 Interval 5427 (2713000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0616 8 episodes - episode_reward: -190.514 [-384.690, -116.067] - loss: 7.843 - mae: 83.810 - mean_q: -111.006 Interval 5428 (2713500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5324 8 episodes - episode_reward: -158.032 [-213.356, -110.122] - loss: 9.578 - mae: 83.814 - mean_q: -111.027 Interval 5429 (2714000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8457 7 episodes - episode_reward: -207.372 [-337.801, -128.520] - loss: 12.607 - mae: 83.824 - mean_q: -111.033 Interval 5430 (2714500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5659 7 episodes - episode_reward: -181.740 [-356.997, -80.397] - loss: 12.738 - mae: 83.830 - mean_q: -111.017 Interval 5431 (2715000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2310 6 episodes - episode_reward: -178.921 [-238.517, -101.503] - loss: 11.446 - mae: 83.834 - mean_q: -111.016 Interval 5432 (2715500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8104 10 episodes - episode_reward: -142.970 [-225.757, 37.358] - loss: 15.155 - mae: 83.848 - mean_q: -111.001 Interval 5433 (2716000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6620 9 episodes - episode_reward: -203.166 [-275.993, -100.000] - loss: 11.819 - mae: 83.823 - mean_q: -110.999 Interval 5434 (2716500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9221 8 episodes - episode_reward: -182.691 [-247.875, -119.083] - loss: 13.193 - mae: 83.829 - mean_q: -110.998 Interval 5435 (2717000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1537 8 episodes - episode_reward: -187.096 [-287.547, -126.497] - loss: 16.516 - mae: 83.845 - mean_q: -110.976 Interval 5436 (2717500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7424 10 episodes - episode_reward: -187.911 [-271.916, -160.198] - loss: 9.421 - mae: 83.812 - mean_q: -110.977 Interval 5437 (2718000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8578 8 episodes - episode_reward: -183.957 [-224.014, -155.313] - loss: 11.807 - mae: 83.828 - mean_q: -110.998 Interval 5438 (2718500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9382 8 episodes - episode_reward: -186.669 [-266.972, -113.749] - loss: 9.050 - mae: 83.824 - mean_q: -110.993 Interval 5439 (2719000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3291 8 episodes - episode_reward: -206.131 [-360.676, -162.777] - loss: 11.191 - mae: 83.850 - mean_q: -110.996 Interval 5440 (2719500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3796 6 episodes - episode_reward: -200.537 [-247.083, -156.666] - loss: 16.627 - mae: 83.871 - mean_q: -110.984 Interval 5441 (2720000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5042 6 episodes - episode_reward: -200.944 [-280.321, -137.021] - loss: 11.399 - mae: 83.846 - mean_q: -110.977 Interval 5442 (2720500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1303 7 episodes - episode_reward: -151.623 [-238.776, 16.388] - loss: 8.576 - mae: 83.835 - mean_q: -111.009 Interval 5443 (2721000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3012 7 episodes - episode_reward: -173.649 [-212.481, -131.356] - loss: 11.714 - mae: 83.850 - mean_q: -111.021 Interval 5444 (2721500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9181 7 episodes - episode_reward: -201.949 [-298.587, -112.856] - loss: 7.524 - mae: 83.828 - mean_q: -111.031 Interval 5445 (2722000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1956 8 episodes - episode_reward: -200.443 [-273.611, -138.594] - loss: 14.449 - mae: 83.856 - mean_q: -111.062 Interval 5446 (2722500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.7637 7 episodes - episode_reward: -413.006 [-792.393, -199.467] - loss: 10.967 - mae: 83.843 - mean_q: -111.063 Interval 5447 (2723000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6159 10 episodes - episode_reward: -185.820 [-254.637, -100.000] - loss: 15.318 - mae: 83.895 - mean_q: -111.067 Interval 5448 (2723500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2218 8 episodes - episode_reward: -192.190 [-329.735, -100.000] - loss: 12.281 - mae: 83.901 - mean_q: -111.065 Interval 5449 (2724000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5291 9 episodes - episode_reward: -148.729 [-182.194, -116.312] - loss: 14.763 - mae: 83.929 - mean_q: -111.069 Interval 5450 (2724500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3987 10 episodes - episode_reward: -166.327 [-313.640, -74.567] - loss: 11.508 - mae: 83.942 - mean_q: -111.058 Interval 5451 (2725000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7871 8 episodes - episode_reward: -174.651 [-274.910, -53.470] - loss: 15.296 - mae: 83.984 - mean_q: -111.044 Interval 5452 (2725500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6213 7 episodes - episode_reward: -186.819 [-226.020, -152.925] - loss: 11.516 - mae: 83.974 - mean_q: -111.066 Interval 5453 (2726000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8144 7 episodes - episode_reward: -180.011 [-271.372, -108.787] - loss: 14.695 - mae: 84.005 - mean_q: -111.070 Interval 5454 (2726500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2532 9 episodes - episode_reward: -190.446 [-328.681, -140.493] - loss: 13.440 - mae: 84.013 - mean_q: -111.055 Interval 5455 (2727000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4958 7 episodes - episode_reward: -181.353 [-262.930, -149.215] - loss: 15.071 - mae: 84.027 - mean_q: -111.041 Interval 5456 (2727500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6809 8 episodes - episode_reward: -170.837 [-254.110, -91.727] - loss: 11.203 - mae: 84.023 - mean_q: -111.059 Interval 5457 (2728000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7526 8 episodes - episode_reward: -169.470 [-204.005, -143.393] - loss: 8.328 - mae: 84.024 - mean_q: -111.090 Interval 5458 (2728500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6884 10 episodes - episode_reward: -184.403 [-247.792, -100.000] - loss: 9.816 - mae: 84.045 - mean_q: -111.130 Interval 5459 (2729000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1150 10 episodes - episode_reward: -157.884 [-254.321, -8.981] - loss: 14.707 - mae: 84.091 - mean_q: -111.135 Interval 5460 (2729500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8938 9 episodes - episode_reward: -158.249 [-201.817, -50.570] - loss: 12.013 - mae: 84.089 - mean_q: -111.123 Interval 5461 (2730000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2839 8 episodes - episode_reward: -207.681 [-249.068, -177.881] - loss: 13.145 - mae: 84.096 - mean_q: -111.140 Interval 5462 (2730500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1214 9 episodes - episode_reward: -173.686 [-264.506, -35.762] - loss: 12.208 - mae: 84.087 - mean_q: -111.152 Interval 5463 (2731000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4644 9 episodes - episode_reward: -198.773 [-266.231, -100.000] - loss: 9.852 - mae: 84.101 - mean_q: -111.194 Interval 5464 (2731500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7900 6 episodes - episode_reward: -143.464 [-196.671, -44.476] - loss: 12.572 - mae: 84.109 - mean_q: -111.232 Interval 5465 (2732000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2388 9 episodes - episode_reward: -182.656 [-269.765, -113.158] - loss: 12.554 - mae: 84.108 - mean_q: -111.239 Interval 5466 (2732500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9504 7 episodes - episode_reward: -210.185 [-283.183, -118.033] - loss: 12.143 - mae: 84.122 - mean_q: -111.259 Interval 5467 (2733000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8592 7 episodes - episode_reward: -182.350 [-273.265, -114.031] - loss: 16.969 - mae: 84.134 - mean_q: -111.248 Interval 5468 (2733500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1759 10 episodes - episode_reward: -174.476 [-280.710, -100.000] - loss: 7.646 - mae: 84.112 - mean_q: -111.285 Interval 5469 (2734000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9187 7 episodes - episode_reward: -207.594 [-244.417, -134.044] - loss: 9.692 - mae: 84.153 - mean_q: -111.312 Interval 5470 (2734500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9819 7 episodes - episode_reward: -195.717 [-232.906, -126.565] - loss: 11.535 - mae: 84.171 - mean_q: -111.331 Interval 5471 (2735000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0257 8 episodes - episode_reward: -201.134 [-307.146, -150.650] - loss: 9.683 - mae: 84.188 - mean_q: -111.381 Interval 5472 (2735500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8522 7 episodes - episode_reward: -197.040 [-226.671, -151.129] - loss: 12.770 - mae: 84.231 - mean_q: -111.418 Interval 5473 (2736000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7399 8 episodes - episode_reward: -181.099 [-270.399, -124.865] - loss: 10.021 - mae: 84.228 - mean_q: -111.403 Interval 5474 (2736500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5444 7 episodes - episode_reward: -167.477 [-269.240, -28.209] - loss: 10.693 - mae: 84.252 - mean_q: -111.445 Interval 5475 (2737000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8107 8 episodes - episode_reward: -189.089 [-305.002, -120.556] - loss: 11.670 - mae: 84.292 - mean_q: -111.466 Interval 5476 (2737500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2784 9 episodes - episode_reward: -176.404 [-249.878, -93.016] - loss: 13.478 - mae: 84.329 - mean_q: -111.470 Interval 5477 (2738000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0214 8 episodes - episode_reward: -192.784 [-334.175, -136.019] - loss: 8.170 - mae: 84.300 - mean_q: -111.508 Interval 5478 (2738500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2837 9 episodes - episode_reward: -178.253 [-224.042, -137.526] - loss: 10.168 - mae: 84.349 - mean_q: -111.544 Interval 5479 (2739000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8853 8 episodes - episode_reward: -177.571 [-210.413, -81.090] - loss: 11.287 - mae: 84.391 - mean_q: -111.551 Interval 5480 (2739500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9747 7 episodes - episode_reward: -139.646 [-193.692, -100.000] - loss: 10.209 - mae: 84.399 - mean_q: -111.559 Interval 5481 (2740000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5491 9 episodes - episode_reward: -204.174 [-334.475, -106.849] - loss: 13.398 - mae: 84.430 - mean_q: -111.601 Interval 5482 (2740500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5638 7 episodes - episode_reward: -179.100 [-228.255, -156.760] - loss: 9.545 - mae: 84.437 - mean_q: -111.629 Interval 5483 (2741000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6035 7 episodes - episode_reward: -185.520 [-316.587, -133.295] - loss: 8.978 - mae: 84.443 - mean_q: -111.690 Interval 5484 (2741500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2519 8 episodes - episode_reward: -138.605 [-261.461, -5.792] - loss: 11.802 - mae: 84.489 - mean_q: -111.729 Interval 5485 (2742000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2438 8 episodes - episode_reward: -211.125 [-334.655, -149.562] - loss: 10.411 - mae: 84.491 - mean_q: -111.753 Interval 5486 (2742500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7622 7 episodes - episode_reward: -183.297 [-240.958, -107.971] - loss: 9.695 - mae: 84.523 - mean_q: -111.793 Interval 5487 (2743000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8528 8 episodes - episode_reward: -188.643 [-300.071, -100.000] - loss: 12.766 - mae: 84.577 - mean_q: -111.833 Interval 5488 (2743500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1145 8 episodes - episode_reward: -185.280 [-263.299, -100.000] - loss: 13.677 - mae: 84.603 - mean_q: -111.839 Interval 5489 (2744000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6794 9 episodes - episode_reward: -157.307 [-250.780, 7.911] - loss: 12.017 - mae: 84.622 - mean_q: -111.824 Interval 5490 (2744500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9766 9 episodes - episode_reward: -161.946 [-268.988, -100.000] - loss: 9.396 - mae: 84.616 - mean_q: -111.827 Interval 5491 (2745000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9700 8 episodes - episode_reward: -188.473 [-265.259, -119.481] - loss: 11.185 - mae: 84.647 - mean_q: -111.828 Interval 5492 (2745500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6761 7 episodes - episode_reward: -181.762 [-263.405, -52.326] - loss: 12.482 - mae: 84.652 - mean_q: -111.850 Interval 5493 (2746000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9556 6 episodes - episode_reward: -231.066 [-360.722, -135.254] - loss: 7.736 - mae: 84.656 - mean_q: -111.871 Interval 5494 (2746500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5361 6 episodes - episode_reward: -230.343 [-357.410, -127.086] - loss: 10.151 - mae: 84.687 - mean_q: -111.906 Interval 5495 (2747000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6206 7 episodes - episode_reward: -175.229 [-265.758, -112.822] - loss: 12.279 - mae: 84.707 - mean_q: -111.927 Interval 5496 (2747500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9455 8 episodes - episode_reward: -185.574 [-261.550, -100.000] - loss: 8.483 - mae: 84.696 - mean_q: -111.956 Interval 5497 (2748000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7708 7 episodes - episode_reward: -216.922 [-350.297, -49.548] - loss: 10.272 - mae: 84.728 - mean_q: -111.979 Interval 5498 (2748500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0296 6 episodes - episode_reward: -163.031 [-253.065, -94.895] - loss: 13.239 - mae: 84.750 - mean_q: -112.008 Interval 5499 (2749000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2822 9 episodes - episode_reward: -186.955 [-240.789, -125.124] - loss: 11.942 - mae: 84.760 - mean_q: -112.025 Interval 5500 (2749500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2183 6 episodes - episode_reward: -181.309 [-236.412, -82.742] - loss: 9.956 - mae: 84.747 - mean_q: -112.010 Interval 5501 (2750000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8177 8 episodes - episode_reward: -174.380 [-300.053, 22.503] - loss: 14.692 - mae: 84.774 - mean_q: -112.018 Interval 5502 (2750500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7626 7 episodes - episode_reward: -197.947 [-445.622, -57.123] - loss: 12.981 - mae: 84.779 - mean_q: -112.028 Interval 5503 (2751000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9378 8 episodes - episode_reward: -187.683 [-263.178, -120.274] - loss: 11.756 - mae: 84.779 - mean_q: -112.025 Interval 5504 (2751500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2654 7 episodes - episode_reward: -158.095 [-224.124, -113.810] - loss: 10.713 - mae: 84.815 - mean_q: -112.039 Interval 5505 (2752000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8387 9 episodes - episode_reward: -160.459 [-344.169, 16.392] - loss: 13.713 - mae: 84.840 - mean_q: -112.070 Interval 5506 (2752500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2498 5 episodes - episode_reward: -195.495 [-314.459, -157.623] - loss: 12.984 - mae: 84.850 - mean_q: -112.083 Interval 5507 (2753000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4106 9 episodes - episode_reward: -195.912 [-289.279, -145.215] - loss: 13.664 - mae: 84.877 - mean_q: -112.083 Interval 5508 (2753500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6529 8 episodes - episode_reward: -178.555 [-275.113, -122.863] - loss: 10.506 - mae: 84.886 - mean_q: -112.102 Interval 5509 (2754000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0464 7 episodes - episode_reward: -144.801 [-209.371, -14.815] - loss: 8.153 - mae: 84.874 - mean_q: -112.134 Interval 5510 (2754500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5490 7 episodes - episode_reward: -175.301 [-238.730, -120.247] - loss: 11.512 - mae: 84.897 - mean_q: -112.158 Interval 5511 (2755000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0570 8 episodes - episode_reward: -191.017 [-249.292, -146.065] - loss: 16.531 - mae: 84.915 - mean_q: -112.167 Interval 5512 (2755500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8550 7 episodes - episode_reward: -195.397 [-293.850, -147.923] - loss: 17.418 - mae: 84.907 - mean_q: -112.141 Interval 5513 (2756000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2129 8 episodes - episode_reward: -150.604 [-201.772, -100.000] - loss: 11.193 - mae: 84.898 - mean_q: -112.131 Interval 5514 (2756500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0572 10 episodes - episode_reward: -153.636 [-215.911, -100.000] - loss: 16.090 - mae: 84.929 - mean_q: -112.115 Interval 5515 (2757000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0347 8 episodes - episode_reward: -179.847 [-225.455, -135.107] - loss: 9.497 - mae: 84.915 - mean_q: -112.108 Interval 5516 (2757500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3751 6 episodes - episode_reward: -205.627 [-318.506, -97.736] - loss: 9.545 - mae: 84.908 - mean_q: -112.149 Interval 5517 (2758000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2525 9 episodes - episode_reward: -181.512 [-307.551, -110.480] - loss: 15.659 - mae: 84.932 - mean_q: -112.162 Interval 5518 (2758500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9905 8 episodes - episode_reward: -186.340 [-244.027, -146.042] - loss: 13.660 - mae: 84.941 - mean_q: -112.144 Interval 5519 (2759000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9363 7 episodes - episode_reward: -204.761 [-282.131, -166.925] - loss: 14.200 - mae: 84.937 - mean_q: -112.135 Interval 5520 (2759500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2875 8 episodes - episode_reward: -213.552 [-267.041, -167.577] - loss: 11.986 - mae: 84.932 - mean_q: -112.134 Interval 5521 (2760000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1303 6 episodes - episode_reward: -180.600 [-248.236, -106.560] - loss: 12.157 - mae: 84.937 - mean_q: -112.135 Interval 5522 (2760500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6508 8 episodes - episode_reward: -163.154 [-242.213, -51.009] - loss: 10.988 - mae: 84.929 - mean_q: -112.151 Interval 5523 (2761000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5053 7 episodes - episode_reward: -171.674 [-217.357, -147.276] - loss: 9.823 - mae: 84.925 - mean_q: -112.191 Interval 5524 (2761500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4548 7 episodes - episode_reward: -179.781 [-229.707, -71.216] - loss: 11.087 - mae: 84.917 - mean_q: -112.213 Interval 5525 (2762000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0838 8 episodes - episode_reward: -193.680 [-301.458, -130.393] - loss: 11.403 - mae: 84.901 - mean_q: -112.252 Interval 5526 (2762500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7178 8 episodes - episode_reward: -174.746 [-235.768, -14.531] - loss: 12.737 - mae: 84.881 - mean_q: -112.252 Interval 5527 (2763000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4595 7 episodes - episode_reward: -158.349 [-213.413, -111.643] - loss: 12.192 - mae: 84.850 - mean_q: -112.273 Interval 5528 (2763500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0031 9 episodes - episode_reward: -163.812 [-245.477, -98.486] - loss: 11.443 - mae: 84.839 - mean_q: -112.278 Interval 5529 (2764000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9503 8 episodes - episode_reward: -254.733 [-414.533, -117.396] - loss: 9.474 - mae: 84.807 - mean_q: -112.274 Interval 5530 (2764500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.6584 4 episodes - episode_reward: -596.284 [-998.841, -392.804] - loss: 9.214 - mae: 84.805 - mean_q: -112.237 Interval 5531 (2765000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5681 7 episodes - episode_reward: -386.138 [-760.599, -100.000] - loss: 15.402 - mae: 84.848 - mean_q: -112.274 Interval 5532 (2765500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5376 8 episodes - episode_reward: -157.856 [-191.659, -103.269] - loss: 13.065 - mae: 84.861 - mean_q: -112.270 Interval 5533 (2766000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7366 8 episodes - episode_reward: -173.883 [-316.004, -113.262] - loss: 9.603 - mae: 84.873 - mean_q: -112.266 Interval 5534 (2766500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5957 7 episodes - episode_reward: -185.230 [-237.523, -133.838] - loss: 13.494 - mae: 84.919 - mean_q: -112.276 Interval 5535 (2767000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4434 6 episodes - episode_reward: -194.633 [-229.655, -153.474] - loss: 11.021 - mae: 84.926 - mean_q: -112.264 Interval 5536 (2767500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9358 9 episodes - episode_reward: -165.339 [-264.278, -20.795] - loss: 12.012 - mae: 84.930 - mean_q: -112.273 Interval 5537 (2768000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0877 7 episodes - episode_reward: -165.901 [-245.603, -25.032] - loss: 13.634 - mae: 84.958 - mean_q: -112.281 Interval 5538 (2768500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9760 7 episodes - episode_reward: -189.910 [-227.480, -151.256] - loss: 10.285 - mae: 84.957 - mean_q: -112.284 Interval 5539 (2769000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9670 9 episodes - episode_reward: -178.285 [-271.919, -108.690] - loss: 14.779 - mae: 84.994 - mean_q: -112.274 Interval 5540 (2769500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2343 8 episodes - episode_reward: -201.136 [-224.977, -178.058] - loss: 12.242 - mae: 84.991 - mean_q: -112.266 Interval 5541 (2770000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2721 9 episodes - episode_reward: -183.415 [-286.117, -100.000] - loss: 14.342 - mae: 85.008 - mean_q: -112.252 Interval 5542 (2770500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5162 6 episodes - episode_reward: -198.853 [-293.557, -134.569] - loss: 13.735 - mae: 85.013 - mean_q: -112.226 Interval 5543 (2771000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0317 9 episodes - episode_reward: -173.615 [-222.058, -100.000] - loss: 10.714 - mae: 85.021 - mean_q: -112.221 Interval 5544 (2771500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3415 9 episodes - episode_reward: -189.879 [-286.616, -115.786] - loss: 11.494 - mae: 85.026 - mean_q: -112.209 Interval 5545 (2772000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1725 7 episodes - episode_reward: -204.286 [-290.575, -160.402] - loss: 8.479 - mae: 84.990 - mean_q: -112.225 Interval 5546 (2772500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0152 9 episodes - episode_reward: -176.809 [-289.365, -100.000] - loss: 12.909 - mae: 84.986 - mean_q: -112.225 Interval 5547 (2773000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4921 7 episodes - episode_reward: -188.392 [-320.735, -126.547] - loss: 15.188 - mae: 84.973 - mean_q: -112.202 Interval 5548 (2773500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7769 8 episodes - episode_reward: -172.956 [-321.947, -69.941] - loss: 8.828 - mae: 84.932 - mean_q: -112.184 Interval 5549 (2774000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1682 7 episodes - episode_reward: -150.704 [-231.397, -18.738] - loss: 11.452 - mae: 84.938 - mean_q: -112.190 Interval 5550 (2774500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2405 7 episodes - episode_reward: -164.350 [-219.284, -146.255] - loss: 11.317 - mae: 84.905 - mean_q: -112.193 Interval 5551 (2775000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5038 6 episodes - episode_reward: -197.073 [-259.995, -150.816] - loss: 9.254 - mae: 84.898 - mean_q: -112.205 Interval 5552 (2775500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1696 8 episodes - episode_reward: -207.970 [-344.182, -100.000] - loss: 11.120 - mae: 84.902 - mean_q: -112.227 Interval 5553 (2776000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5817 8 episodes - episode_reward: -160.137 [-229.384, -104.076] - loss: 11.356 - mae: 84.890 - mean_q: -112.212 Interval 5554 (2776500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8704 8 episodes - episode_reward: -177.706 [-233.503, -93.400] - loss: 10.224 - mae: 84.882 - mean_q: -112.224 Interval 5555 (2777000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1532 8 episodes - episode_reward: -197.076 [-320.669, -100.000] - loss: 10.728 - mae: 84.864 - mean_q: -112.226 Interval 5556 (2777500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0561 6 episodes - episode_reward: -179.595 [-207.844, -143.315] - loss: 9.205 - mae: 84.856 - mean_q: -112.252 Interval 5557 (2778000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.2831 10 episodes - episode_reward: -353.940 [-1014.440, -100.000] - loss: 11.489 - mae: 84.869 - mean_q: -112.281 Interval 5558 (2778500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2252 8 episodes - episode_reward: -203.479 [-233.998, -178.285] - loss: 12.796 - mae: 84.909 - mean_q: -112.275 Interval 5559 (2779000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9660 7 episodes - episode_reward: -218.207 [-301.291, -126.875] - loss: 9.182 - mae: 84.924 - mean_q: -112.282 Interval 5560 (2779500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3408 6 episodes - episode_reward: -187.078 [-266.616, -144.296] - loss: 13.151 - mae: 84.971 - mean_q: -112.295 Interval 5561 (2780000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3707 7 episodes - episode_reward: -175.424 [-215.399, -133.469] - loss: 14.643 - mae: 84.999 - mean_q: -112.272 Interval 5562 (2780500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8947 6 episodes - episode_reward: -152.550 [-190.075, -41.339] - loss: 12.035 - mae: 85.008 - mean_q: -112.234 Interval 5563 (2781000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3798 7 episodes - episode_reward: -176.843 [-262.927, -145.585] - loss: 12.553 - mae: 84.999 - mean_q: -112.224 Interval 5564 (2781500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9023 8 episodes - episode_reward: -174.712 [-228.449, -135.570] - loss: 10.331 - mae: 84.992 - mean_q: -112.223 Interval 5565 (2782000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1229 9 episodes - episode_reward: -170.876 [-233.407, -100.000] - loss: 12.867 - mae: 85.017 - mean_q: -112.209 Interval 5566 (2782500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9336 7 episodes - episode_reward: -207.321 [-410.783, -100.000] - loss: 10.934 - mae: 85.024 - mean_q: -112.206 Interval 5567 (2783000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9800 8 episodes - episode_reward: -193.363 [-262.256, -135.074] - loss: 9.337 - mae: 85.028 - mean_q: -112.224 Interval 5568 (2783500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2412 9 episodes - episode_reward: -180.317 [-294.871, -100.000] - loss: 10.848 - mae: 85.029 - mean_q: -112.241 Interval 5569 (2784000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4916 6 episodes - episode_reward: -192.401 [-261.495, -156.539] - loss: 11.160 - mae: 85.051 - mean_q: -112.221 Interval 5570 (2784500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0160 7 episodes - episode_reward: -229.106 [-316.813, -161.025] - loss: 14.229 - mae: 85.070 - mean_q: -112.198 Interval 5571 (2785000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9810 6 episodes - episode_reward: -147.008 [-242.428, -57.556] - loss: 10.739 - mae: 85.043 - mean_q: -112.184 Interval 5572 (2785500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5015 10 episodes - episode_reward: -190.618 [-279.501, -100.000] - loss: 11.643 - mae: 85.046 - mean_q: -112.183 Interval 5573 (2786000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1236 6 episodes - episode_reward: -166.816 [-245.194, -73.508] - loss: 11.543 - mae: 85.068 - mean_q: -112.168 Interval 5574 (2786500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7527 8 episodes - episode_reward: -177.393 [-232.701, -128.067] - loss: 12.533 - mae: 85.053 - mean_q: -112.145 Interval 5575 (2787000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9386 6 episodes - episode_reward: -235.953 [-339.145, -182.235] - loss: 9.803 - mae: 85.044 - mean_q: -112.132 Interval 5576 (2787500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3764 7 episodes - episode_reward: -174.185 [-268.319, -109.357] - loss: 9.887 - mae: 85.040 - mean_q: -112.143 Interval 5577 (2788000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9184 8 episodes - episode_reward: -177.240 [-258.458, -98.079] - loss: 9.950 - mae: 85.050 - mean_q: -112.140 Interval 5578 (2788500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8644 8 episodes - episode_reward: -184.348 [-227.252, -133.406] - loss: 12.311 - mae: 85.058 - mean_q: -112.161 Interval 5579 (2789000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8292 8 episodes - episode_reward: -176.775 [-224.954, -147.560] - loss: 12.673 - mae: 85.023 - mean_q: -112.171 Interval 5580 (2789500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8211 8 episodes - episode_reward: -180.424 [-280.732, -140.156] - loss: 12.807 - mae: 85.019 - mean_q: -112.134 Interval 5581 (2790000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8238 7 episodes - episode_reward: -195.745 [-242.081, -117.595] - loss: 10.911 - mae: 85.021 - mean_q: -112.115 Interval 5582 (2790500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0046 7 episodes - episode_reward: -136.331 [-193.561, -4.300] - loss: 13.322 - mae: 85.031 - mean_q: -112.118 Interval 5583 (2791000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7892 7 episodes - episode_reward: -199.363 [-273.548, -157.226] - loss: 12.798 - mae: 85.046 - mean_q: -112.089 Interval 5584 (2791500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6820 8 episodes - episode_reward: -169.424 [-275.787, 27.641] - loss: 14.029 - mae: 85.027 - mean_q: -112.070 Interval 5585 (2792000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2749 8 episodes - episode_reward: -211.986 [-376.373, -144.470] - loss: 13.345 - mae: 85.021 - mean_q: -112.039 Interval 5586 (2792500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6825 6 episodes - episode_reward: -214.837 [-284.669, -118.331] - loss: 13.419 - mae: 85.008 - mean_q: -112.049 Interval 5587 (2793000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6330 8 episodes - episode_reward: -167.379 [-206.865, -136.516] - loss: 11.057 - mae: 85.002 - mean_q: -112.034 Interval 5588 (2793500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0160 7 episodes - episode_reward: -210.746 [-265.127, -129.506] - loss: 12.655 - mae: 84.990 - mean_q: -112.011 Interval 5589 (2794000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2319 7 episodes - episode_reward: -233.773 [-396.044, -103.512] - loss: 11.848 - mae: 85.003 - mean_q: -112.015 Interval 5590 (2794500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9765 7 episodes - episode_reward: -147.183 [-237.851, -48.410] - loss: 11.584 - mae: 84.996 - mean_q: -112.002 Interval 5591 (2795000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8219 7 episodes - episode_reward: -183.500 [-223.756, -127.439] - loss: 12.828 - mae: 84.981 - mean_q: -111.992 Interval 5592 (2795500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6266 7 episodes - episode_reward: -191.635 [-320.881, -100.000] - loss: 15.892 - mae: 84.975 - mean_q: -111.959 Interval 5593 (2796000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3864 10 episodes - episode_reward: -178.664 [-273.327, -93.920] - loss: 13.158 - mae: 84.978 - mean_q: -111.926 Interval 5594 (2796500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5055 7 episodes - episode_reward: -179.931 [-236.655, -117.186] - loss: 8.678 - mae: 84.948 - mean_q: -111.910 Interval 5595 (2797000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9965 8 episodes - episode_reward: -185.959 [-253.129, -100.000] - loss: 14.441 - mae: 84.971 - mean_q: -111.893 Interval 5596 (2797500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9493 5 episodes - episode_reward: -182.411 [-242.168, -95.886] - loss: 12.030 - mae: 84.933 - mean_q: -111.868 Interval 5597 (2798000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2442 7 episodes - episode_reward: -165.711 [-240.905, -115.967] - loss: 10.517 - mae: 84.914 - mean_q: -111.868 Interval 5598 (2798500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5872 6 episodes - episode_reward: -213.669 [-375.112, -79.938] - loss: 12.163 - mae: 84.926 - mean_q: -111.861 Interval 5599 (2799000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1898 8 episodes - episode_reward: -198.882 [-241.893, -100.000] - loss: 10.654 - mae: 84.902 - mean_q: -111.853 Interval 5600 (2799500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3635 8 episodes - episode_reward: -207.344 [-219.795, -174.984] - loss: 12.825 - mae: 84.893 - mean_q: -111.842 Interval 5601 (2800000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6301 7 episodes - episode_reward: -185.405 [-291.190, -137.450] - loss: 9.396 - mae: 84.860 - mean_q: -111.829 Interval 5602 (2800500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9052 7 episodes - episode_reward: -212.641 [-314.888, -144.370] - loss: 9.550 - mae: 84.868 - mean_q: -111.821 Interval 5603 (2801000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5827 11 episodes - episode_reward: -165.859 [-265.705, -100.000] - loss: 12.192 - mae: 84.850 - mean_q: -111.815 Interval 5604 (2801500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6927 6 episodes - episode_reward: -145.221 [-209.593, 11.427] - loss: 11.129 - mae: 84.837 - mean_q: -111.805 Interval 5605 (2802000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0316 8 episodes - episode_reward: -184.047 [-275.533, -100.000] - loss: 14.189 - mae: 84.842 - mean_q: -111.806 Interval 5606 (2802500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0868 9 episodes - episode_reward: -174.576 [-239.302, -100.000] - loss: 12.002 - mae: 84.825 - mean_q: -111.778 Interval 5607 (2803000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6753 8 episodes - episode_reward: -162.779 [-345.946, -32.568] - loss: 8.778 - mae: 84.785 - mean_q: -111.775 Interval 5608 (2803500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0826 8 episodes - episode_reward: -194.481 [-287.462, -100.000] - loss: 9.214 - mae: 84.807 - mean_q: -111.815 Interval 5609 (2804000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8682 9 episodes - episode_reward: -162.106 [-212.029, -100.000] - loss: 9.981 - mae: 84.812 - mean_q: -111.802 Interval 5610 (2804500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8418 9 episodes - episode_reward: -161.337 [-237.426, -100.000] - loss: 10.414 - mae: 84.802 - mean_q: -111.802 Interval 5611 (2805000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7104 6 episodes - episode_reward: -213.406 [-342.037, -84.222] - loss: 11.205 - mae: 84.796 - mean_q: -111.787 Interval 5612 (2805500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4956 6 episodes - episode_reward: -196.187 [-317.111, -144.286] - loss: 14.708 - mae: 84.809 - mean_q: -111.764 Interval 5613 (2806000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2446 8 episodes - episode_reward: -212.652 [-308.204, -146.146] - loss: 15.159 - mae: 84.818 - mean_q: -111.755 Interval 5614 (2806500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6997 7 episodes - episode_reward: -198.355 [-246.444, -160.771] - loss: 9.520 - mae: 84.801 - mean_q: -111.774 Interval 5615 (2807000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5408 8 episodes - episode_reward: -154.675 [-203.189, -94.492] - loss: 13.112 - mae: 84.827 - mean_q: -111.771 Interval 5616 (2807500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3631 8 episodes - episode_reward: -152.229 [-202.136, -63.090] - loss: 10.361 - mae: 84.818 - mean_q: -111.765 Interval 5617 (2808000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5040 7 episodes - episode_reward: -178.375 [-267.862, -124.260] - loss: 11.159 - mae: 84.826 - mean_q: -111.748 Interval 5618 (2808500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6260 7 episodes - episode_reward: -185.238 [-274.996, -44.715] - loss: 9.770 - mae: 84.794 - mean_q: -111.745 Interval 5619 (2809000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4276 7 episodes - episode_reward: -168.691 [-266.853, 38.751] - loss: 11.260 - mae: 84.808 - mean_q: -111.754 Interval 5620 (2809500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8685 7 episodes - episode_reward: -138.078 [-213.724, 19.726] - loss: 14.117 - mae: 84.802 - mean_q: -111.729 Interval 5621 (2810000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4376 7 episodes - episode_reward: -146.658 [-186.725, -100.000] - loss: 14.921 - mae: 84.790 - mean_q: -111.696 Interval 5622 (2810500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8191 8 episodes - episode_reward: -202.247 [-326.244, -144.265] - loss: 10.215 - mae: 84.776 - mean_q: -111.695 Interval 5623 (2811000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8117 11 episodes - episode_reward: -164.985 [-287.247, -100.000] - loss: 13.131 - mae: 84.790 - mean_q: -111.684 Interval 5624 (2811500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5992 8 episodes - episode_reward: -168.417 [-214.369, -122.028] - loss: 11.231 - mae: 84.775 - mean_q: -111.682 Interval 5625 (2812000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3657 9 episodes - episode_reward: -186.466 [-312.072, -100.000] - loss: 10.185 - mae: 84.748 - mean_q: -111.681 Interval 5626 (2812500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3455 9 episodes - episode_reward: -192.002 [-295.766, -124.975] - loss: 11.169 - mae: 84.736 - mean_q: -111.687 Interval 5627 (2813000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3997 7 episodes - episode_reward: -169.370 [-222.202, -100.000] - loss: 12.863 - mae: 84.753 - mean_q: -111.689 Interval 5628 (2813500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6477 9 episodes - episode_reward: -196.111 [-431.870, -119.213] - loss: 10.853 - mae: 84.743 - mean_q: -111.668 Interval 5629 (2814000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4438 8 episodes - episode_reward: -150.260 [-329.693, 30.025] - loss: 10.770 - mae: 84.722 - mean_q: -111.665 Interval 5630 (2814500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7123 8 episodes - episode_reward: -108.560 [-218.785, -17.440] - loss: 13.272 - mae: 84.690 - mean_q: -111.657 Interval 5631 (2815000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1363 9 episodes - episode_reward: -175.913 [-326.950, -100.000] - loss: 11.942 - mae: 84.637 - mean_q: -111.638 Interval 5632 (2815500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3361 8 episodes - episode_reward: -207.683 [-271.032, -140.375] - loss: 13.847 - mae: 84.595 - mean_q: -111.624 Interval 5633 (2816000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7206 7 episodes - episode_reward: -193.082 [-237.887, -157.955] - loss: 11.826 - mae: 84.557 - mean_q: -111.624 Interval 5634 (2816500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2554 9 episodes - episode_reward: -189.399 [-223.997, -166.999] - loss: 12.051 - mae: 84.509 - mean_q: -111.614 Interval 5635 (2817000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5744 8 episodes - episode_reward: -222.518 [-332.499, -158.386] - loss: 13.378 - mae: 84.479 - mean_q: -111.604 Interval 5636 (2817500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2080 3 episodes - episode_reward: -617.388 [-740.158, -476.923] - loss: 10.345 - mae: 84.439 - mean_q: -111.559 Interval 5637 (2818000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.2409 4 episodes - episode_reward: -821.229 [-1454.094, -567.833] - loss: 13.290 - mae: 84.440 - mean_q: -111.549 Interval 5638 (2818500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4126 9 episodes - episode_reward: -194.521 [-323.087, -100.000] - loss: 11.332 - mae: 84.451 - mean_q: -111.597 Interval 5639 (2819000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8969 7 episodes - episode_reward: -214.152 [-256.093, -144.332] - loss: 9.494 - mae: 84.461 - mean_q: -111.611 Interval 5640 (2819500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8416 8 episodes - episode_reward: -178.071 [-266.197, -110.966] - loss: 10.460 - mae: 84.495 - mean_q: -111.642 Interval 5641 (2820000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4117 7 episodes - episode_reward: -165.442 [-285.319, -100.000] - loss: 11.106 - mae: 84.531 - mean_q: -111.668 Interval 5642 (2820500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6808 10 episodes - episode_reward: -138.439 [-184.559, 26.913] - loss: 10.243 - mae: 84.561 - mean_q: -111.680 Interval 5643 (2821000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5984 7 episodes - episode_reward: -179.632 [-229.959, -123.603] - loss: 13.101 - mae: 84.582 - mean_q: -111.678 Interval 5644 (2821500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0635 8 episodes - episode_reward: -196.083 [-242.411, -138.277] - loss: 11.866 - mae: 84.607 - mean_q: -111.663 Interval 5645 (2822000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2247 7 episodes - episode_reward: -229.837 [-421.392, -157.907] - loss: 11.060 - mae: 84.607 - mean_q: -111.666 Interval 5646 (2822500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1600 7 episodes - episode_reward: -146.052 [-266.926, 45.510] - loss: 14.770 - mae: 84.637 - mean_q: -111.661 Interval 5647 (2823000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5374 7 episodes - episode_reward: -188.690 [-258.968, -149.417] - loss: 13.567 - mae: 84.651 - mean_q: -111.638 Interval 5648 (2823500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1368 8 episodes - episode_reward: -187.110 [-390.378, -100.000] - loss: 9.918 - mae: 84.632 - mean_q: -111.631 Interval 5649 (2824000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2585 7 episodes - episode_reward: -167.169 [-212.545, -104.553] - loss: 12.382 - mae: 84.670 - mean_q: -111.659 Interval 5650 (2824500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0970 9 episodes - episode_reward: -171.513 [-255.377, -100.000] - loss: 12.470 - mae: 84.681 - mean_q: -111.642 Interval 5651 (2825000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0722 8 episodes - episode_reward: -193.418 [-365.711, -95.442] - loss: 16.888 - mae: 84.722 - mean_q: -111.620 Interval 5652 (2825500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6021 7 episodes - episode_reward: -184.561 [-243.384, -132.976] - loss: 9.354 - mae: 84.695 - mean_q: -111.615 Interval 5653 (2826000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0960 7 episodes - episode_reward: -216.946 [-421.404, -100.000] - loss: 12.736 - mae: 84.716 - mean_q: -111.643 Interval 5654 (2826500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6266 7 episodes - episode_reward: -191.455 [-235.887, -131.865] - loss: 11.878 - mae: 84.720 - mean_q: -111.638 Interval 5655 (2827000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8288 7 episodes - episode_reward: -205.350 [-264.731, -116.758] - loss: 13.506 - mae: 84.729 - mean_q: -111.656 Interval 5656 (2827500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8312 8 episodes - episode_reward: -167.169 [-223.788, -108.760] - loss: 15.143 - mae: 84.710 - mean_q: -111.634 Interval 5657 (2828000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5202 7 episodes - episode_reward: -190.441 [-309.975, -42.741] - loss: 15.397 - mae: 84.686 - mean_q: -111.597 Interval 5658 (2828500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3423 7 episodes - episode_reward: -164.757 [-269.940, -110.430] - loss: 13.360 - mae: 84.657 - mean_q: -111.556 Interval 5659 (2829000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8115 8 episodes - episode_reward: -181.502 [-273.256, -140.207] - loss: 11.227 - mae: 84.606 - mean_q: -111.551 Interval 5660 (2829500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4838 6 episodes - episode_reward: -180.976 [-214.385, -117.102] - loss: 9.653 - mae: 84.574 - mean_q: -111.562 Interval 5661 (2830000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2909 8 episodes - episode_reward: -216.534 [-267.372, -133.287] - loss: 15.251 - mae: 84.571 - mean_q: -111.541 Interval 5662 (2830500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4657 7 episodes - episode_reward: -175.832 [-285.115, -78.994] - loss: 11.522 - mae: 84.537 - mean_q: -111.539 Interval 5663 (2831000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4667 8 episodes - episode_reward: -164.251 [-211.906, -123.574] - loss: 9.883 - mae: 84.496 - mean_q: -111.565 Interval 5664 (2831500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2292 6 episodes - episode_reward: -182.032 [-348.885, -106.960] - loss: 14.355 - mae: 84.501 - mean_q: -111.568 Interval 5665 (2832000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6848 7 episodes - episode_reward: -185.432 [-272.830, -128.835] - loss: 12.894 - mae: 84.505 - mean_q: -111.560 Interval 5666 (2832500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6026 7 episodes - episode_reward: -190.649 [-284.733, -132.575] - loss: 10.445 - mae: 84.463 - mean_q: -111.565 Interval 5667 (2833000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5228 7 episodes - episode_reward: -176.295 [-223.269, -106.708] - loss: 12.652 - mae: 84.465 - mean_q: -111.575 Interval 5668 (2833500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6421 10 episodes - episode_reward: -184.807 [-317.076, -127.841] - loss: 11.872 - mae: 84.450 - mean_q: -111.582 Interval 5669 (2834000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4075 9 episodes - episode_reward: -186.314 [-295.384, -113.216] - loss: 11.258 - mae: 84.447 - mean_q: -111.582 Interval 5670 (2834500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7271 8 episodes - episode_reward: -168.563 [-248.712, -120.803] - loss: 10.432 - mae: 84.444 - mean_q: -111.588 Interval 5671 (2835000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0255 6 episodes - episode_reward: -160.008 [-284.270, -42.427] - loss: 14.857 - mae: 84.444 - mean_q: -111.597 Interval 5672 (2835500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0071 8 episodes - episode_reward: -135.202 [-250.592, -18.119] - loss: 13.509 - mae: 84.425 - mean_q: -111.579 Interval 5673 (2836000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2742 6 episodes - episode_reward: -176.908 [-242.668, -142.582] - loss: 13.451 - mae: 84.405 - mean_q: -111.588 Interval 5674 (2836500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0582 8 episodes - episode_reward: -143.647 [-254.923, 31.900] - loss: 14.135 - mae: 84.418 - mean_q: -111.567 Interval 5675 (2837000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9792 8 episodes - episode_reward: -184.909 [-278.825, -93.925] - loss: 12.637 - mae: 84.398 - mean_q: -111.540 Interval 5676 (2837500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1116 7 episodes - episode_reward: -151.160 [-306.032, 16.264] - loss: 14.231 - mae: 84.401 - mean_q: -111.510 Interval 5677 (2838000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0968 9 episodes - episode_reward: -172.950 [-237.630, -110.365] - loss: 12.978 - mae: 84.373 - mean_q: -111.476 Interval 5678 (2838500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1894 10 episodes - episode_reward: -155.114 [-201.693, -130.009] - loss: 12.726 - mae: 84.348 - mean_q: -111.454 Interval 5679 (2839000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8860 9 episodes - episode_reward: -165.174 [-224.610, -94.146] - loss: 13.317 - mae: 84.336 - mean_q: -111.460 Interval 5680 (2839500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0679 8 episodes - episode_reward: -187.502 [-280.077, -100.000] - loss: 14.539 - mae: 84.309 - mean_q: -111.440 Interval 5681 (2840000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5878 7 episodes - episode_reward: -189.160 [-293.371, -27.924] - loss: 12.666 - mae: 84.288 - mean_q: -111.440 Interval 5682 (2840500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2695 8 episodes - episode_reward: -205.063 [-283.557, -126.374] - loss: 12.641 - mae: 84.299 - mean_q: -111.438 Interval 5683 (2841000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0893 6 episodes - episode_reward: -168.912 [-251.639, -67.621] - loss: 10.761 - mae: 84.287 - mean_q: -111.415 Interval 5684 (2841500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9958 8 episodes - episode_reward: -185.326 [-315.352, -119.862] - loss: 13.608 - mae: 84.285 - mean_q: -111.404 Interval 5685 (2842000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8447 8 episodes - episode_reward: -182.073 [-231.997, -142.054] - loss: 13.886 - mae: 84.287 - mean_q: -111.397 Interval 5686 (2842500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9840 7 episodes - episode_reward: -211.050 [-369.530, -149.109] - loss: 12.916 - mae: 84.281 - mean_q: -111.387 Interval 5687 (2843000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3139 7 episodes - episode_reward: -152.317 [-229.813, -84.748] - loss: 13.790 - mae: 84.282 - mean_q: -111.358 Interval 5688 (2843500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8755 7 episodes - episode_reward: -199.380 [-239.950, -139.592] - loss: 16.471 - mae: 84.278 - mean_q: -111.326 Interval 5689 (2844000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8238 9 episodes - episode_reward: -174.259 [-266.339, -1.632] - loss: 12.258 - mae: 84.245 - mean_q: -111.306 Interval 5690 (2844500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9428 9 episodes - episode_reward: -154.268 [-225.366, -66.225] - loss: 13.202 - mae: 84.249 - mean_q: -111.308 Interval 5691 (2845000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5169 9 episodes - episode_reward: -200.003 [-248.058, -162.570] - loss: 13.520 - mae: 84.226 - mean_q: -111.284 Interval 5692 (2845500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0668 7 episodes - episode_reward: -220.234 [-482.621, -100.000] - loss: 12.566 - mae: 84.213 - mean_q: -111.283 Interval 5693 (2846000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9943 8 episodes - episode_reward: -184.300 [-265.337, -125.066] - loss: 15.228 - mae: 84.212 - mean_q: -111.273 Interval 5694 (2846500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6783 7 episodes - episode_reward: -198.375 [-266.947, -27.435] - loss: 11.737 - mae: 84.192 - mean_q: -111.247 Interval 5695 (2847000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5929 8 episodes - episode_reward: -154.991 [-221.972, -68.303] - loss: 9.913 - mae: 84.185 - mean_q: -111.275 Interval 5696 (2847500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6238 7 episodes - episode_reward: -182.410 [-231.128, -159.908] - loss: 12.721 - mae: 84.183 - mean_q: -111.261 Interval 5697 (2848000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8928 8 episodes - episode_reward: -182.813 [-243.783, -135.445] - loss: 10.535 - mae: 84.171 - mean_q: -111.269 Interval 5698 (2848500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9802 8 episodes - episode_reward: -197.169 [-254.768, -151.268] - loss: 15.593 - mae: 84.168 - mean_q: -111.265 Interval 5699 (2849000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6880 8 episodes - episode_reward: -160.923 [-206.569, -100.000] - loss: 12.901 - mae: 84.158 - mean_q: -111.237 Interval 5700 (2849500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4925 8 episodes - episode_reward: -152.516 [-228.501, -88.009] - loss: 10.595 - mae: 84.140 - mean_q: -111.217 Interval 5701 (2850000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5557 8 episodes - episode_reward: -171.264 [-251.900, -112.803] - loss: 13.208 - mae: 84.147 - mean_q: -111.201 Interval 5702 (2850500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5926 7 episodes - episode_reward: -180.619 [-328.727, -116.860] - loss: 11.141 - mae: 84.119 - mean_q: -111.181 Interval 5703 (2851000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5732 8 episodes - episode_reward: -169.727 [-232.087, -125.068] - loss: 12.685 - mae: 84.116 - mean_q: -111.175 Interval 5704 (2851500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5426 8 episodes - episode_reward: -150.621 [-253.348, 3.353] - loss: 16.356 - mae: 84.126 - mean_q: -111.145 Interval 5705 (2852000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2022 6 episodes - episode_reward: -179.130 [-249.622, -130.980] - loss: 11.698 - mae: 84.087 - mean_q: -111.117 Interval 5706 (2852500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7934 9 episodes - episode_reward: -154.253 [-200.394, -100.000] - loss: 14.161 - mae: 84.068 - mean_q: -111.103 Interval 5707 (2853000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7310 9 episodes - episode_reward: -143.403 [-189.661, -100.000] - loss: 12.661 - mae: 84.052 - mean_q: -111.092 Interval 5708 (2853500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7900 8 episodes - episode_reward: -190.763 [-306.391, -101.786] - loss: 12.721 - mae: 84.029 - mean_q: -111.073 Interval 5709 (2854000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5910 9 episodes - episode_reward: -143.668 [-209.236, -9.314] - loss: 13.944 - mae: 84.027 - mean_q: -111.022 Interval 5710 (2854500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7930 8 episodes - episode_reward: -168.776 [-275.205, -109.333] - loss: 9.449 - mae: 83.990 - mean_q: -111.018 Interval 5711 (2855000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7953 7 episodes - episode_reward: -193.423 [-237.330, -159.973] - loss: 12.305 - mae: 83.989 - mean_q: -110.996 Interval 5712 (2855500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3263 9 episodes - episode_reward: -190.520 [-409.460, -44.371] - loss: 10.425 - mae: 83.956 - mean_q: -110.983 Interval 5713 (2856000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9227 9 episodes - episode_reward: -161.933 [-229.435, -100.000] - loss: 16.029 - mae: 83.964 - mean_q: -110.946 Interval 5714 (2856500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0091 7 episodes - episode_reward: -215.127 [-293.741, -167.558] - loss: 8.333 - mae: 83.923 - mean_q: -110.938 Interval 5715 (2857000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0283 8 episodes - episode_reward: -186.142 [-249.883, -100.000] - loss: 10.243 - mae: 83.923 - mean_q: -110.959 Interval 5716 (2857500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9078 8 episodes - episode_reward: -188.367 [-250.680, -95.400] - loss: 14.920 - mae: 83.930 - mean_q: -110.903 Interval 5717 (2858000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7532 8 episodes - episode_reward: -233.934 [-445.630, -130.949] - loss: 13.388 - mae: 83.902 - mean_q: -110.857 Interval 5718 (2858500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3534 8 episodes - episode_reward: -203.379 [-261.204, -145.570] - loss: 10.637 - mae: 83.881 - mean_q: -110.854 Interval 5719 (2859000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8736 9 episodes - episode_reward: -168.005 [-258.027, -106.580] - loss: 10.354 - mae: 83.864 - mean_q: -110.867 Interval 5720 (2859500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0132 8 episodes - episode_reward: -184.444 [-311.927, -100.000] - loss: 13.615 - mae: 83.872 - mean_q: -110.853 Interval 5721 (2860000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5711 6 episodes - episode_reward: -204.296 [-300.399, -157.109] - loss: 12.521 - mae: 83.854 - mean_q: -110.806 Interval 5722 (2860500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7214 10 episodes - episode_reward: -187.926 [-375.352, -120.511] - loss: 11.418 - mae: 83.832 - mean_q: -110.824 Interval 5723 (2861000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5919 9 episodes - episode_reward: -161.469 [-197.130, -126.599] - loss: 9.831 - mae: 83.818 - mean_q: -110.837 Interval 5724 (2861500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0246 8 episodes - episode_reward: -235.401 [-308.769, -172.701] - loss: 11.054 - mae: 83.816 - mean_q: -110.853 Interval 5725 (2862000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1214 8 episodes - episode_reward: -198.118 [-242.635, -141.219] - loss: 12.049 - mae: 83.805 - mean_q: -110.864 Interval 5726 (2862500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4593 7 episodes - episode_reward: -168.119 [-250.306, 80.504] - loss: 11.279 - mae: 83.825 - mean_q: -110.875 Interval 5727 (2863000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2263 9 episodes - episode_reward: -181.380 [-220.863, -137.558] - loss: 12.300 - mae: 83.812 - mean_q: -110.900 Interval 5728 (2863500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4522 9 episodes - episode_reward: -183.471 [-223.833, -147.652] - loss: 10.778 - mae: 83.808 - mean_q: -110.901 Interval 5729 (2864000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1474 9 episodes - episode_reward: -177.970 [-259.928, -100.000] - loss: 11.076 - mae: 83.810 - mean_q: -110.902 Interval 5730 (2864500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7386 9 episodes - episode_reward: -159.158 [-233.866, -115.038] - loss: 13.223 - mae: 83.840 - mean_q: -110.908 Interval 5731 (2865000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5936 7 episodes - episode_reward: -175.475 [-228.936, -99.100] - loss: 13.937 - mae: 83.856 - mean_q: -110.907 Interval 5732 (2865500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1334 7 episodes - episode_reward: -219.058 [-280.101, -148.940] - loss: 9.599 - mae: 83.833 - mean_q: -110.923 Interval 5733 (2866000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0429 7 episodes - episode_reward: -224.087 [-414.724, -113.102] - loss: 11.374 - mae: 83.832 - mean_q: -110.948 Interval 5734 (2866500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7638 7 episodes - episode_reward: -208.694 [-253.004, -178.083] - loss: 12.782 - mae: 83.845 - mean_q: -110.942 Interval 5735 (2867000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5933 8 episodes - episode_reward: -158.979 [-207.588, -113.768] - loss: 13.584 - mae: 83.841 - mean_q: -110.933 Interval 5736 (2867500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7039 9 episodes - episode_reward: -141.082 [-246.515, 30.083] - loss: 11.879 - mae: 83.819 - mean_q: -110.954 Interval 5737 (2868000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1601 10 episodes - episode_reward: -160.529 [-237.484, -77.053] - loss: 9.458 - mae: 83.769 - mean_q: -110.967 Interval 5738 (2868500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9670 10 episodes - episode_reward: -155.668 [-204.463, -119.456] - loss: 10.354 - mae: 83.742 - mean_q: -110.999 Interval 5739 (2869000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2713 6 episodes - episode_reward: -266.396 [-478.828, -131.191] - loss: 10.047 - mae: 83.720 - mean_q: -111.028 Interval 5740 (2869500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2977 9 episodes - episode_reward: -183.415 [-239.257, -130.162] - loss: 9.814 - mae: 83.692 - mean_q: -111.063 Interval 5741 (2870000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.7313 6 episodes - episode_reward: -393.521 [-626.404, -120.933] - loss: 12.073 - mae: 83.705 - mean_q: -111.025 Interval 5742 (2870500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.6934 6 episodes - episode_reward: -468.398 [-860.608, -100.000] - loss: 12.409 - mae: 83.708 - mean_q: -110.994 Interval 5743 (2871000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3124 8 episodes - episode_reward: -217.342 [-412.582, -100.000] - loss: 11.860 - mae: 83.743 - mean_q: -111.029 Interval 5744 (2871500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2055 8 episodes - episode_reward: -196.229 [-308.355, -115.264] - loss: 15.315 - mae: 83.789 - mean_q: -111.008 Interval 5745 (2872000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9379 8 episodes - episode_reward: -186.386 [-243.419, -156.867] - loss: 11.939 - mae: 83.776 - mean_q: -110.999 Interval 5746 (2872500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4336 8 episodes - episode_reward: -208.122 [-238.010, -145.473] - loss: 15.228 - mae: 83.800 - mean_q: -110.979 Interval 5747 (2873000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6562 9 episodes - episode_reward: -153.274 [-236.010, -110.048] - loss: 13.971 - mae: 83.808 - mean_q: -110.979 Interval 5748 (2873500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8152 6 episodes - episode_reward: -220.108 [-347.226, -133.296] - loss: 11.460 - mae: 83.797 - mean_q: -110.985 Interval 5749 (2874000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2536 8 episodes - episode_reward: -214.440 [-355.252, -127.927] - loss: 11.328 - mae: 83.807 - mean_q: -111.006 Interval 5750 (2874500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4502 7 episodes - episode_reward: -179.123 [-284.856, 19.526] - loss: 12.414 - mae: 83.833 - mean_q: -111.014 Interval 5751 (2875000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8769 7 episodes - episode_reward: -196.186 [-232.096, -96.236] - loss: 12.137 - mae: 83.848 - mean_q: -111.023 Interval 5752 (2875500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6259 8 episodes - episode_reward: -167.161 [-251.020, -77.826] - loss: 13.021 - mae: 83.871 - mean_q: -111.014 Interval 5753 (2876000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6290 7 episodes - episode_reward: -194.116 [-255.498, -121.902] - loss: 11.560 - mae: 83.883 - mean_q: -111.015 Interval 5754 (2876500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9565 8 episodes - episode_reward: -180.113 [-254.521, -70.628] - loss: 13.916 - mae: 83.893 - mean_q: -111.019 Interval 5755 (2877000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6335 7 episodes - episode_reward: -187.386 [-241.503, -156.606] - loss: 11.620 - mae: 83.884 - mean_q: -111.025 Interval 5756 (2877500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6320 5 episodes - episode_reward: -176.458 [-266.559, 25.970] - loss: 10.557 - mae: 83.877 - mean_q: -111.033 Interval 5757 (2878000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1768 8 episodes - episode_reward: -181.676 [-296.847, -38.263] - loss: 12.971 - mae: 83.883 - mean_q: -111.019 Interval 5758 (2878500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6619 6 episodes - episode_reward: -230.929 [-377.771, -118.747] - loss: 10.125 - mae: 83.885 - mean_q: -111.019 Interval 5759 (2879000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0786 8 episodes - episode_reward: -195.106 [-266.998, -122.473] - loss: 14.094 - mae: 83.906 - mean_q: -111.030 Interval 5760 (2879500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3512 9 episodes - episode_reward: -187.246 [-303.009, -126.835] - loss: 12.100 - mae: 83.899 - mean_q: -111.022 Interval 5761 (2880000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6437 7 episodes - episode_reward: -184.065 [-257.092, -124.112] - loss: 12.960 - mae: 83.897 - mean_q: -111.033 Interval 5762 (2880500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5917 8 episodes - episode_reward: -163.382 [-214.627, -92.885] - loss: 11.494 - mae: 83.895 - mean_q: -111.055 Interval 5763 (2881000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3052 8 episodes - episode_reward: -143.309 [-205.679, -62.264] - loss: 11.150 - mae: 83.910 - mean_q: -111.060 Interval 5764 (2881500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2994 9 episodes - episode_reward: -182.585 [-291.056, -100.000] - loss: 11.373 - mae: 83.913 - mean_q: -111.085 Interval 5765 (2882000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7579 10 episodes - episode_reward: -185.185 [-334.608, -56.509] - loss: 14.161 - mae: 83.929 - mean_q: -111.084 Interval 5766 (2882500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4194 8 episodes - episode_reward: -220.875 [-488.751, -129.368] - loss: 12.138 - mae: 83.944 - mean_q: -111.089 Interval 5767 (2883000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0520 8 episodes - episode_reward: -184.331 [-234.715, -127.872] - loss: 14.649 - mae: 83.963 - mean_q: -111.087 Interval 5768 (2883500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7546 7 episodes - episode_reward: -191.723 [-270.445, -111.802] - loss: 10.297 - mae: 83.953 - mean_q: -111.093 Interval 5769 (2884000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6676 7 episodes - episode_reward: -199.205 [-300.679, -126.905] - loss: 11.121 - mae: 83.955 - mean_q: -111.128 Interval 5770 (2884500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9966 8 episodes - episode_reward: -191.516 [-252.954, -154.597] - loss: 9.566 - mae: 83.957 - mean_q: -111.145 Interval 5771 (2885000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3543 6 episodes - episode_reward: -100.255 [-197.995, 37.742] - loss: 13.556 - mae: 83.978 - mean_q: -111.158 Interval 5772 (2885500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5599 8 episodes - episode_reward: -166.502 [-210.461, -132.784] - loss: 11.103 - mae: 83.978 - mean_q: -111.170 Interval 5773 (2886000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5713 8 episodes - episode_reward: -160.688 [-244.265, -0.327] - loss: 15.474 - mae: 84.003 - mean_q: -111.175 Interval 5774 (2886500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6146 6 episodes - episode_reward: -208.178 [-264.635, -150.422] - loss: 11.944 - mae: 83.988 - mean_q: -111.182 Interval 5775 (2887000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8577 7 episodes - episode_reward: -199.963 [-261.526, -157.114] - loss: 10.365 - mae: 83.985 - mean_q: -111.206 Interval 5776 (2887500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6558 7 episodes - episode_reward: -193.358 [-292.666, -25.081] - loss: 10.637 - mae: 83.989 - mean_q: -111.240 Interval 5777 (2888000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7896 8 episodes - episode_reward: -176.701 [-205.226, -152.525] - loss: 10.869 - mae: 84.000 - mean_q: -111.253 Interval 5778 (2888500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4917 7 episodes - episode_reward: -186.421 [-229.379, -156.865] - loss: 11.926 - mae: 84.035 - mean_q: -111.291 Interval 5779 (2889000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9089 7 episodes - episode_reward: -203.417 [-309.544, -132.027] - loss: 8.871 - mae: 84.013 - mean_q: -111.321 Interval 5780 (2889500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5151 6 episodes - episode_reward: -228.086 [-388.102, -148.352] - loss: 9.761 - mae: 84.033 - mean_q: -111.394 Interval 5781 (2890000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6354 8 episodes - episode_reward: -217.477 [-333.305, -21.380] - loss: 12.003 - mae: 84.080 - mean_q: -111.434 Interval 5782 (2890500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8829 8 episodes - episode_reward: -180.806 [-251.060, -152.539] - loss: 13.867 - mae: 84.094 - mean_q: -111.457 Interval 5783 (2891000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5977 6 episodes - episode_reward: -200.571 [-334.262, -7.688] - loss: 10.544 - mae: 84.097 - mean_q: -111.471 Interval 5784 (2891500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3926 6 episodes - episode_reward: -204.232 [-300.684, -102.603] - loss: 11.562 - mae: 84.141 - mean_q: -111.506 Interval 5785 (2892000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2661 7 episodes - episode_reward: -175.494 [-232.582, -108.806] - loss: 12.546 - mae: 84.175 - mean_q: -111.514 Interval 5786 (2892500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1804 6 episodes - episode_reward: -166.941 [-257.499, -54.404] - loss: 9.381 - mae: 84.184 - mean_q: -111.544 Interval 5787 (2893000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0624 9 episodes - episode_reward: -171.974 [-259.971, -74.615] - loss: 8.660 - mae: 84.220 - mean_q: -111.582 Interval 5788 (2893500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4640 7 episodes - episode_reward: -245.652 [-406.589, -184.071] - loss: 13.289 - mae: 84.279 - mean_q: -111.609 Interval 5789 (2894000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6555 7 episodes - episode_reward: -193.262 [-238.012, -168.913] - loss: 12.563 - mae: 84.295 - mean_q: -111.615 Interval 5790 (2894500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8709 7 episodes - episode_reward: -187.729 [-264.479, -145.290] - loss: 10.676 - mae: 84.298 - mean_q: -111.629 Interval 5791 (2895000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6647 9 episodes - episode_reward: -159.435 [-253.021, -5.659] - loss: 9.600 - mae: 84.320 - mean_q: -111.672 Interval 5792 (2895500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6086 7 episodes - episode_reward: -184.848 [-260.004, -131.229] - loss: 12.083 - mae: 84.340 - mean_q: -111.697 Interval 5793 (2896000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3941 7 episodes - episode_reward: -173.948 [-225.526, -115.804] - loss: 14.805 - mae: 84.357 - mean_q: -111.688 Interval 5794 (2896500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0924 8 episodes - episode_reward: -197.331 [-250.930, -147.253] - loss: 11.285 - mae: 84.369 - mean_q: -111.690 Interval 5795 (2897000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3661 6 episodes - episode_reward: -194.378 [-251.034, -108.737] - loss: 15.647 - mae: 84.393 - mean_q: -111.702 Interval 5796 (2897500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2376 6 episodes - episode_reward: -182.924 [-225.773, -85.275] - loss: 12.732 - mae: 84.399 - mean_q: -111.718 Interval 5797 (2898000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8014 8 episodes - episode_reward: -174.175 [-229.386, -134.479] - loss: 12.643 - mae: 84.403 - mean_q: -111.731 Interval 5798 (2898500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8450 8 episodes - episode_reward: -173.685 [-212.681, -137.153] - loss: 8.200 - mae: 84.419 - mean_q: -111.761 Interval 5799 (2899000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7873 8 episodes - episode_reward: -180.842 [-278.294, -149.054] - loss: 14.193 - mae: 84.456 - mean_q: -111.772 Interval 5800 (2899500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0403 6 episodes - episode_reward: -164.113 [-218.282, -95.842] - loss: 9.438 - mae: 84.437 - mean_q: -111.785 Interval 5801 (2900000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1907 7 episodes - episode_reward: -163.239 [-198.117, -109.184] - loss: 11.706 - mae: 84.467 - mean_q: -111.804 Interval 5802 (2900500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7103 7 episodes - episode_reward: -198.607 [-242.624, -148.064] - loss: 13.900 - mae: 84.492 - mean_q: -111.800 Interval 5803 (2901000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8312 8 episodes - episode_reward: -175.669 [-283.986, -100.000] - loss: 11.247 - mae: 84.496 - mean_q: -111.790 Interval 5804 (2901500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1465 9 episodes - episode_reward: -170.982 [-218.448, -109.212] - loss: 8.053 - mae: 84.492 - mean_q: -111.810 Interval 5805 (2902000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2530 7 episodes - episode_reward: -157.121 [-247.568, -24.804] - loss: 14.388 - mae: 84.529 - mean_q: -111.872 Interval 5806 (2902500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8741 8 episodes - episode_reward: -182.311 [-222.173, -134.241] - loss: 10.318 - mae: 84.544 - mean_q: -111.881 Interval 5807 (2903000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4981 8 episodes - episode_reward: -220.413 [-333.167, -136.947] - loss: 12.831 - mae: 84.571 - mean_q: -111.919 Interval 5808 (2903500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9750 8 episodes - episode_reward: -183.689 [-205.163, -155.151] - loss: 10.977 - mae: 84.570 - mean_q: -111.948 Interval 5809 (2904000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5884 8 episodes - episode_reward: -163.268 [-197.529, -131.804] - loss: 11.077 - mae: 84.587 - mean_q: -111.986 Interval 5810 (2904500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9654 8 episodes - episode_reward: -188.571 [-252.113, -124.789] - loss: 12.469 - mae: 84.618 - mean_q: -111.990 Interval 5811 (2905000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9789 7 episodes - episode_reward: -199.263 [-255.079, -157.561] - loss: 12.753 - mae: 84.616 - mean_q: -112.015 Interval 5812 (2905500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3959 10 episodes - episode_reward: -180.278 [-205.453, -100.000] - loss: 11.852 - mae: 84.620 - mean_q: -112.012 Interval 5813 (2906000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5562 8 episodes - episode_reward: -151.293 [-234.457, -97.218] - loss: 11.144 - mae: 84.636 - mean_q: -112.047 Interval 5814 (2906500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0146 7 episodes - episode_reward: -223.305 [-379.098, -109.133] - loss: 11.983 - mae: 84.654 - mean_q: -112.043 Interval 5815 (2907000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6252 6 episodes - episode_reward: -208.642 [-259.691, -164.200] - loss: 11.276 - mae: 84.662 - mean_q: -112.051 Interval 5816 (2907500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5935 8 episodes - episode_reward: -158.957 [-221.450, -22.108] - loss: 9.986 - mae: 84.652 - mean_q: -112.067 Interval 5817 (2908000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7940 8 episodes - episode_reward: -181.850 [-240.700, -144.296] - loss: 11.881 - mae: 84.662 - mean_q: -112.083 Interval 5818 (2908500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6858 8 episodes - episode_reward: -173.778 [-325.590, -116.774] - loss: 13.114 - mae: 84.678 - mean_q: -112.087 Interval 5819 (2909000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5035 7 episodes - episode_reward: -170.988 [-223.923, -53.131] - loss: 14.040 - mae: 84.700 - mean_q: -112.086 Interval 5820 (2909500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7785 7 episodes - episode_reward: -199.299 [-283.732, -123.060] - loss: 12.934 - mae: 84.714 - mean_q: -112.096 Interval 5821 (2910000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2870 6 episodes - episode_reward: -190.617 [-234.618, -146.197] - loss: 12.568 - mae: 84.713 - mean_q: -112.081 Interval 5822 (2910500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1328 8 episodes - episode_reward: -198.541 [-281.240, -100.000] - loss: 10.738 - mae: 84.728 - mean_q: -112.072 Interval 5823 (2911000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1062 6 episodes - episode_reward: -166.144 [-216.809, -126.803] - loss: 12.728 - mae: 84.744 - mean_q: -112.071 Interval 5824 (2911500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8140 10 episodes - episode_reward: -199.600 [-356.102, -100.000] - loss: 14.718 - mae: 84.759 - mean_q: -112.040 Interval 5825 (2912000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5291 6 episodes - episode_reward: -193.279 [-277.084, -121.873] - loss: 12.495 - mae: 84.737 - mean_q: -112.012 Interval 5826 (2912500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1646 9 episodes - episode_reward: -175.056 [-252.085, -104.099] - loss: 10.014 - mae: 84.715 - mean_q: -112.036 Interval 5827 (2913000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2045 9 episodes - episode_reward: -183.073 [-298.329, -100.000] - loss: 13.171 - mae: 84.739 - mean_q: -112.041 Interval 5828 (2913500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3949 8 episodes - episode_reward: -151.802 [-257.822, -22.055] - loss: 9.645 - mae: 84.712 - mean_q: -112.039 Interval 5829 (2914000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9592 7 episodes - episode_reward: -210.128 [-321.143, -118.797] - loss: 11.095 - mae: 84.714 - mean_q: -112.061 Interval 5830 (2914500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5701 7 episodes - episode_reward: -176.798 [-287.947, -127.233] - loss: 10.944 - mae: 84.723 - mean_q: -112.056 Interval 5831 (2915000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3614 8 episodes - episode_reward: -217.540 [-292.551, -117.326] - loss: 10.174 - mae: 84.719 - mean_q: -112.077 Interval 5832 (2915500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8041 9 episodes - episode_reward: -157.917 [-201.792, -121.801] - loss: 9.826 - mae: 84.717 - mean_q: -112.093 Interval 5833 (2916000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4663 6 episodes - episode_reward: -194.597 [-313.455, -92.595] - loss: 10.025 - mae: 84.738 - mean_q: -112.108 Interval 5834 (2916500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4468 7 episodes - episode_reward: -172.007 [-216.575, -124.007] - loss: 14.165 - mae: 84.751 - mean_q: -112.103 Interval 5835 (2917000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3090 7 episodes - episode_reward: -172.777 [-303.060, -36.612] - loss: 10.226 - mae: 84.716 - mean_q: -112.100 Interval 5836 (2917500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8750 6 episodes - episode_reward: -151.307 [-198.693, -90.601] - loss: 9.457 - mae: 84.727 - mean_q: -112.115 Interval 5837 (2918000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7486 7 episodes - episode_reward: -206.467 [-292.352, -161.907] - loss: 10.320 - mae: 84.722 - mean_q: -112.130 Interval 5838 (2918500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7169 8 episodes - episode_reward: -169.018 [-213.174, -100.000] - loss: 10.643 - mae: 84.724 - mean_q: -112.142 Interval 5839 (2919000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3238 7 episodes - episode_reward: -164.171 [-291.911, -40.920] - loss: 13.249 - mae: 84.745 - mean_q: -112.151 Interval 5840 (2919500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5454 8 episodes - episode_reward: -161.456 [-223.120, -124.801] - loss: 10.704 - mae: 84.729 - mean_q: -112.163 Interval 5841 (2920000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4341 7 episodes - episode_reward: -171.127 [-220.461, -118.880] - loss: 12.248 - mae: 84.726 - mean_q: -112.135 Interval 5842 (2920500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7606 7 episodes - episode_reward: -175.947 [-264.199, -102.322] - loss: 7.436 - mae: 84.671 - mean_q: -112.141 Interval 5843 (2921000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2506 7 episodes - episode_reward: -231.201 [-385.116, -153.499] - loss: 8.780 - mae: 84.633 - mean_q: -112.175 Interval 5844 (2921500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.7000 5 episodes - episode_reward: -587.779 [-1004.033, -408.857] - loss: 11.104 - mae: 84.622 - mean_q: -112.144 Interval 5845 (2922000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3921 8 episodes - episode_reward: -336.358 [-539.351, -100.000] - loss: 13.748 - mae: 84.621 - mean_q: -112.126 Interval 5846 (2922500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6349 8 episodes - episode_reward: -168.424 [-257.354, -20.693] - loss: 11.748 - mae: 84.629 - mean_q: -112.137 Interval 5847 (2923000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5416 7 episodes - episode_reward: -178.629 [-206.549, -139.039] - loss: 14.584 - mae: 84.662 - mean_q: -112.122 Interval 5848 (2923500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3316 8 episodes - episode_reward: -156.192 [-207.408, -18.228] - loss: 9.644 - mae: 84.640 - mean_q: -112.085 Interval 5849 (2924000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5025 6 episodes - episode_reward: -190.261 [-208.324, -169.733] - loss: 13.592 - mae: 84.641 - mean_q: -112.087 Interval 5850 (2924500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4087 8 episodes - episode_reward: -219.845 [-356.653, -156.226] - loss: 8.870 - mae: 84.632 - mean_q: -112.097 Interval 5851 (2925000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9304 8 episodes - episode_reward: -186.730 [-305.573, -146.118] - loss: 10.576 - mae: 84.629 - mean_q: -112.101 Interval 5852 (2925500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4418 7 episodes - episode_reward: -169.524 [-237.520, -119.413] - loss: 10.962 - mae: 84.631 - mean_q: -112.116 Interval 5853 (2926000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6355 8 episodes - episode_reward: -170.866 [-223.450, -138.785] - loss: 9.160 - mae: 84.632 - mean_q: -112.132 Interval 5854 (2926500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0838 8 episodes - episode_reward: -192.656 [-242.225, -151.488] - loss: 16.265 - mae: 84.663 - mean_q: -112.129 Interval 5855 (2927000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1674 8 episodes - episode_reward: -184.213 [-292.429, -114.697] - loss: 12.462 - mae: 84.636 - mean_q: -112.109 Interval 5856 (2927500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2097 6 episodes - episode_reward: -279.705 [-538.429, -170.633] - loss: 11.182 - mae: 84.626 - mean_q: -112.082 Interval 5857 (2928000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8438 9 episodes - episode_reward: -207.320 [-449.548, -123.789] - loss: 11.191 - mae: 84.626 - mean_q: -112.086 Interval 5858 (2928500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8228 7 episodes - episode_reward: -208.591 [-298.475, -168.563] - loss: 13.678 - mae: 84.634 - mean_q: -112.066 Interval 5859 (2929000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6947 6 episodes - episode_reward: -219.668 [-266.666, -163.423] - loss: 8.434 - mae: 84.612 - mean_q: -112.052 Interval 5860 (2929500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4406 7 episodes - episode_reward: -182.112 [-218.459, -133.140] - loss: 11.208 - mae: 84.612 - mean_q: -112.063 Interval 5861 (2930000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8110 7 episodes - episode_reward: -203.808 [-289.764, -125.278] - loss: 10.241 - mae: 84.605 - mean_q: -112.088 Interval 5862 (2930500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5029 10 episodes - episode_reward: -175.743 [-252.645, -123.454] - loss: 11.890 - mae: 84.632 - mean_q: -112.100 Interval 5863 (2931000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9886 8 episodes - episode_reward: -185.290 [-242.151, -142.234] - loss: 10.018 - mae: 84.612 - mean_q: -112.130 Interval 5864 (2931500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6556 6 episodes - episode_reward: -207.921 [-256.918, -156.293] - loss: 11.153 - mae: 84.640 - mean_q: -112.149 Interval 5865 (2932000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2740 9 episodes - episode_reward: -187.748 [-263.963, -151.516] - loss: 10.720 - mae: 84.640 - mean_q: -112.170 Interval 5866 (2932500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7199 7 episodes - episode_reward: -193.792 [-278.467, -134.228] - loss: 17.006 - mae: 84.672 - mean_q: -112.108 Interval 5867 (2933000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7740 8 episodes - episode_reward: -177.862 [-245.883, 19.182] - loss: 10.516 - mae: 84.639 - mean_q: -112.120 Interval 5868 (2933500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5129 7 episodes - episode_reward: -171.920 [-227.073, -132.871] - loss: 9.793 - mae: 84.638 - mean_q: -112.137 Interval 5869 (2934000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1871 7 episodes - episode_reward: -157.938 [-299.697, -100.015] - loss: 10.876 - mae: 84.638 - mean_q: -112.150 Interval 5870 (2934500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6570 6 episodes - episode_reward: -215.820 [-348.046, -81.645] - loss: 12.747 - mae: 84.662 - mean_q: -112.139 Interval 5871 (2935000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4933 8 episodes - episode_reward: -160.287 [-201.690, -115.693] - loss: 15.193 - mae: 84.656 - mean_q: -112.117 Interval 5872 (2935500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4853 7 episodes - episode_reward: -183.431 [-267.724, -137.654] - loss: 12.357 - mae: 84.654 - mean_q: -112.110 Interval 5873 (2936000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0334 7 episodes - episode_reward: -217.029 [-289.649, -165.927] - loss: 11.581 - mae: 84.663 - mean_q: -112.106 Interval 5874 (2936500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6079 6 episodes - episode_reward: -213.507 [-324.033, -150.152] - loss: 14.593 - mae: 84.677 - mean_q: -112.097 Interval 5875 (2937000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5490 7 episodes - episode_reward: -175.045 [-254.461, -139.194] - loss: 15.990 - mae: 84.691 - mean_q: -112.082 Interval 5876 (2937500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9841 8 episodes - episode_reward: -188.795 [-262.207, -121.429] - loss: 10.246 - mae: 84.664 - mean_q: -112.065 Interval 5877 (2938000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1575 7 episodes - episode_reward: -157.025 [-308.011, 96.132] - loss: 11.884 - mae: 84.670 - mean_q: -112.069 Interval 5878 (2938500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4108 7 episodes - episode_reward: -168.365 [-249.161, -132.457] - loss: 12.667 - mae: 84.665 - mean_q: -112.073 Interval 5879 (2939000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5375 6 episodes - episode_reward: -207.359 [-313.717, -146.332] - loss: 12.117 - mae: 84.652 - mean_q: -112.064 Interval 5880 (2939500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4714 10 episodes - episode_reward: -180.468 [-281.219, -100.000] - loss: 9.470 - mae: 84.638 - mean_q: -112.099 Interval 5881 (2940000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9404 8 episodes - episode_reward: -185.413 [-243.115, -128.193] - loss: 9.179 - mae: 84.658 - mean_q: -112.130 Interval 5882 (2940500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4923 7 episodes - episode_reward: -162.959 [-213.793, -116.745] - loss: 10.385 - mae: 84.663 - mean_q: -112.149 Interval 5883 (2941000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8707 9 episodes - episode_reward: -166.587 [-210.915, -125.831] - loss: 15.201 - mae: 84.678 - mean_q: -112.152 Interval 5884 (2941500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2718 8 episodes - episode_reward: -194.857 [-348.107, -100.000] - loss: 11.627 - mae: 84.636 - mean_q: -112.138 Interval 5885 (2942000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9867 9 episodes - episode_reward: -174.332 [-239.893, -119.890] - loss: 11.607 - mae: 84.625 - mean_q: -112.147 Interval 5886 (2942500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8588 7 episodes - episode_reward: -202.401 [-274.453, -167.536] - loss: 9.562 - mae: 84.628 - mean_q: -112.172 Interval 5887 (2943000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5228 8 episodes - episode_reward: -164.643 [-240.214, -100.000] - loss: 12.650 - mae: 84.653 - mean_q: -112.171 Interval 5888 (2943500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8848 8 episodes - episode_reward: -178.762 [-254.504, -139.365] - loss: 10.572 - mae: 84.643 - mean_q: -112.164 Interval 5889 (2944000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8055 7 episodes - episode_reward: -209.418 [-322.249, -110.742] - loss: 10.813 - mae: 84.650 - mean_q: -112.158 Interval 5890 (2944500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0439 8 episodes - episode_reward: -178.858 [-351.414, -100.000] - loss: 14.584 - mae: 84.680 - mean_q: -112.143 Interval 5891 (2945000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8747 8 episodes - episode_reward: -175.205 [-292.420, -110.995] - loss: 11.811 - mae: 84.686 - mean_q: -112.118 Interval 5892 (2945500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7445 8 episodes - episode_reward: -185.031 [-244.276, -125.135] - loss: 11.500 - mae: 84.671 - mean_q: -112.128 Interval 5893 (2946000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0957 9 episodes - episode_reward: -167.042 [-261.117, -107.621] - loss: 10.252 - mae: 84.656 - mean_q: -112.145 Interval 5894 (2946500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7035 6 episodes - episode_reward: -206.141 [-267.520, -135.789] - loss: 12.026 - mae: 84.665 - mean_q: -112.149 Interval 5895 (2947000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2518 7 episodes - episode_reward: -245.191 [-402.945, -163.625] - loss: 11.027 - mae: 84.665 - mean_q: -112.154 Interval 5896 (2947500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5313 8 episodes - episode_reward: -225.627 [-458.221, -122.287] - loss: 11.461 - mae: 84.692 - mean_q: -112.180 Interval 5897 (2948000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8526 7 episodes - episode_reward: -198.580 [-285.509, -127.239] - loss: 11.083 - mae: 84.689 - mean_q: -112.176 Interval 5898 (2948500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9407 9 episodes - episode_reward: -166.125 [-196.939, -100.000] - loss: 13.512 - mae: 84.713 - mean_q: -112.169 Interval 5899 (2949000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1609 7 episodes - episode_reward: -159.095 [-273.886, -108.952] - loss: 14.176 - mae: 84.720 - mean_q: -112.158 Interval 5900 (2949500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9252 9 episodes - episode_reward: -158.554 [-212.382, -100.000] - loss: 9.265 - mae: 84.716 - mean_q: -112.169 Interval 5901 (2950000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5467 7 episodes - episode_reward: -176.380 [-230.905, -134.476] - loss: 13.002 - mae: 84.729 - mean_q: -112.160 Interval 5902 (2950500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5131 8 episodes - episode_reward: -159.145 [-189.898, -128.448] - loss: 8.728 - mae: 84.707 - mean_q: -112.170 Interval 5903 (2951000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7800 8 episodes - episode_reward: -170.909 [-235.248, -115.320] - loss: 12.929 - mae: 84.726 - mean_q: -112.176 Interval 5904 (2951500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9466 9 episodes - episode_reward: -170.955 [-230.640, -23.849] - loss: 9.979 - mae: 84.723 - mean_q: -112.173 Interval 5905 (2952000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7082 7 episodes - episode_reward: -183.800 [-268.351, -133.562] - loss: 10.972 - mae: 84.719 - mean_q: -112.192 Interval 5906 (2952500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9937 7 episodes - episode_reward: -203.196 [-292.004, -113.808] - loss: 11.379 - mae: 84.723 - mean_q: -112.213 Interval 5907 (2953000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2957 9 episodes - episode_reward: -199.034 [-362.207, -3.006] - loss: 10.359 - mae: 84.723 - mean_q: -112.212 Interval 5908 (2953500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6629 8 episodes - episode_reward: -165.877 [-232.505, 27.817] - loss: 10.944 - mae: 84.746 - mean_q: -112.219 Interval 5909 (2954000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4614 7 episodes - episode_reward: -166.588 [-220.297, -120.942] - loss: 10.891 - mae: 84.749 - mean_q: -112.240 Interval 5910 (2954500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6203 8 episodes - episode_reward: -166.214 [-211.431, -91.536] - loss: 13.254 - mae: 84.767 - mean_q: -112.258 Interval 5911 (2955000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4373 6 episodes - episode_reward: -201.544 [-287.426, -140.921] - loss: 12.645 - mae: 84.761 - mean_q: -112.243 Interval 5912 (2955500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7901 8 episodes - episode_reward: -180.893 [-212.297, -118.080] - loss: 10.357 - mae: 84.750 - mean_q: -112.273 Interval 5913 (2956000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2390 8 episodes - episode_reward: -202.230 [-337.847, -100.000] - loss: 11.361 - mae: 84.738 - mean_q: -112.293 Interval 5914 (2956500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2732 7 episodes - episode_reward: -156.515 [-230.138, 39.374] - loss: 13.155 - mae: 84.756 - mean_q: -112.284 Interval 5915 (2957000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9159 9 episodes - episode_reward: -158.045 [-247.004, -49.647] - loss: 12.495 - mae: 84.761 - mean_q: -112.258 Interval 5916 (2957500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7989 8 episodes - episode_reward: -183.405 [-232.520, -130.076] - loss: 13.084 - mae: 84.783 - mean_q: -112.235 Interval 5917 (2958000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9204 7 episodes - episode_reward: -200.786 [-290.388, -131.176] - loss: 14.202 - mae: 84.778 - mean_q: -112.215 Interval 5918 (2958500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0060 7 episodes - episode_reward: -147.078 [-234.708, 52.245] - loss: 12.875 - mae: 84.796 - mean_q: -112.210 Interval 5919 (2959000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6194 7 episodes - episode_reward: -191.942 [-225.808, -167.590] - loss: 11.904 - mae: 84.800 - mean_q: -112.194 Interval 5920 (2959500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3147 6 episodes - episode_reward: -178.458 [-238.512, -125.787] - loss: 12.648 - mae: 84.801 - mean_q: -112.161 Interval 5921 (2960000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7666 8 episodes - episode_reward: -184.272 [-283.776, -12.110] - loss: 13.499 - mae: 84.807 - mean_q: -112.176 Interval 5922 (2960500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0487 5 episodes - episode_reward: -176.345 [-229.622, -95.908] - loss: 13.029 - mae: 84.797 - mean_q: -112.167 Interval 5923 (2961000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5754 9 episodes - episode_reward: -152.411 [-229.208, -39.069] - loss: 10.420 - mae: 84.786 - mean_q: -112.176 Interval 5924 (2961500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7800 7 episodes - episode_reward: -202.141 [-319.250, -136.746] - loss: 12.532 - mae: 84.787 - mean_q: -112.185 Interval 5925 (2962000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2914 7 episodes - episode_reward: -167.970 [-280.674, 7.030] - loss: 10.596 - mae: 84.784 - mean_q: -112.165 Interval 5926 (2962500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6439 8 episodes - episode_reward: -169.611 [-255.985, -113.424] - loss: 11.021 - mae: 84.766 - mean_q: -112.164 Interval 5927 (2963000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6286 7 episodes - episode_reward: -174.160 [-239.359, -114.080] - loss: 12.836 - mae: 84.793 - mean_q: -112.162 Interval 5928 (2963500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8458 8 episodes - episode_reward: -176.627 [-289.775, -100.000] - loss: 13.090 - mae: 84.794 - mean_q: -112.139 Interval 5929 (2964000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9444 8 episodes - episode_reward: -184.881 [-247.175, -126.099] - loss: 12.923 - mae: 84.792 - mean_q: -112.097 Interval 5930 (2964500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4128 9 episodes - episode_reward: -193.398 [-228.512, -144.556] - loss: 13.080 - mae: 84.782 - mean_q: -112.071 Interval 5931 (2965000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4336 8 episodes - episode_reward: -151.401 [-283.168, 3.681] - loss: 9.442 - mae: 84.759 - mean_q: -112.063 Interval 5932 (2965500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5182 7 episodes - episode_reward: -180.148 [-373.693, -122.115] - loss: 12.388 - mae: 84.767 - mean_q: -112.066 Interval 5933 (2966000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9108 7 episodes - episode_reward: -217.089 [-330.777, -127.238] - loss: 7.949 - mae: 84.743 - mean_q: -112.050 Interval 5934 (2966500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0223 6 episodes - episode_reward: -227.105 [-369.702, -139.929] - loss: 13.495 - mae: 84.762 - mean_q: -112.033 Interval 5935 (2967000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3260 8 episodes - episode_reward: -219.495 [-258.908, -154.183] - loss: 11.311 - mae: 84.751 - mean_q: -112.030 Interval 5936 (2967500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4442 9 episodes - episode_reward: -186.781 [-279.762, -143.307] - loss: 13.246 - mae: 84.770 - mean_q: -112.033 Interval 5937 (2968000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8089 8 episodes - episode_reward: -182.987 [-287.119, 14.443] - loss: 13.811 - mae: 84.755 - mean_q: -112.026 Interval 5938 (2968500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6277 7 episodes - episode_reward: -188.123 [-255.029, -110.209] - loss: 12.328 - mae: 84.737 - mean_q: -112.014 Interval 5939 (2969000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9404 7 episodes - episode_reward: -137.905 [-234.169, -14.599] - loss: 16.695 - mae: 84.750 - mean_q: -111.984 Interval 5940 (2969500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1084 7 episodes - episode_reward: -209.246 [-338.130, -106.889] - loss: 12.530 - mae: 84.737 - mean_q: -111.972 Interval 5941 (2970000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2165 9 episodes - episode_reward: -186.458 [-239.938, -134.300] - loss: 13.584 - mae: 84.735 - mean_q: -111.960 Interval 5942 (2970500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0468 8 episodes - episode_reward: -192.797 [-373.983, -80.171] - loss: 11.870 - mae: 84.715 - mean_q: -111.947 Interval 5943 (2971000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1354 6 episodes - episode_reward: -238.950 [-493.721, -117.128] - loss: 14.374 - mae: 84.703 - mean_q: -111.963 Interval 5944 (2971500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7095 8 episodes - episode_reward: -183.177 [-272.989, -137.070] - loss: 15.324 - mae: 84.667 - mean_q: -111.946 Interval 5945 (2972000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4509 10 episodes - episode_reward: -175.147 [-271.758, -100.539] - loss: 15.601 - mae: 84.631 - mean_q: -111.909 Interval 5946 (2972500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6918 8 episodes - episode_reward: -163.060 [-215.923, -97.414] - loss: 14.145 - mae: 84.581 - mean_q: -111.913 Interval 5947 (2973000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8379 8 episodes - episode_reward: -179.626 [-236.046, -61.534] - loss: 10.635 - mae: 84.518 - mean_q: -111.933 Interval 5948 (2973500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6494 7 episodes - episode_reward: -336.441 [-714.039, -115.166] - loss: 12.698 - mae: 84.501 - mean_q: -111.930 Interval 5949 (2974000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.5877 2 episodes - episode_reward: -1012.858 [-1561.119, -464.597] - loss: 11.914 - mae: 84.489 - mean_q: -111.874 Interval 5950 (2974500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3432 6 episodes - episode_reward: -486.578 [-1058.313, -148.363] - loss: 11.134 - mae: 84.515 - mean_q: -111.912 Interval 5951 (2975000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0374 9 episodes - episode_reward: -169.644 [-266.886, -92.723] - loss: 12.345 - mae: 84.543 - mean_q: -111.921 Interval 5952 (2975500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8774 8 episodes - episode_reward: -174.659 [-222.282, -142.969] - loss: 13.281 - mae: 84.572 - mean_q: -111.921 Interval 5953 (2976000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7828 8 episodes - episode_reward: -171.742 [-263.956, 6.218] - loss: 18.082 - mae: 84.617 - mean_q: -111.891 Interval 5954 (2976500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7114 9 episodes - episode_reward: -155.761 [-259.233, 3.627] - loss: 11.219 - mae: 84.607 - mean_q: -111.882 Interval 5955 (2977000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2040 7 episodes - episode_reward: -149.164 [-202.786, -93.807] - loss: 9.628 - mae: 84.617 - mean_q: -111.912 Interval 5956 (2977500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9884 8 episodes - episode_reward: -190.633 [-266.810, -134.413] - loss: 9.084 - mae: 84.630 - mean_q: -111.936 Interval 5957 (2978000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0918 10 episodes - episode_reward: -158.937 [-207.280, -111.168] - loss: 13.743 - mae: 84.677 - mean_q: -111.943 Interval 5958 (2978500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6533 6 episodes - episode_reward: -132.045 [-229.811, 26.354] - loss: 14.575 - mae: 84.685 - mean_q: -111.929 Interval 5959 (2979000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9137 8 episodes - episode_reward: -167.952 [-219.745, -100.000] - loss: 14.315 - mae: 84.677 - mean_q: -111.895 Interval 5960 (2979500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6149 8 episodes - episode_reward: -178.943 [-260.355, -115.736] - loss: 14.310 - mae: 84.663 - mean_q: -111.876 Interval 5961 (2980000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8939 7 episodes - episode_reward: -209.451 [-338.379, -145.001] - loss: 11.985 - mae: 84.658 - mean_q: -111.883 Interval 5962 (2980500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6739 10 episodes - episode_reward: -183.684 [-242.374, -123.517] - loss: 11.746 - mae: 84.642 - mean_q: -111.869 Interval 5963 (2981000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1645 9 episodes - episode_reward: -177.028 [-235.382, -127.429] - loss: 15.723 - mae: 84.658 - mean_q: -111.856 Interval 5964 (2981500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9050 6 episodes - episode_reward: -240.837 [-500.565, -146.203] - loss: 12.616 - mae: 84.654 - mean_q: -111.836 Interval 5965 (2982000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6283 7 episodes - episode_reward: -182.981 [-257.223, -136.444] - loss: 13.116 - mae: 84.660 - mean_q: -111.845 Interval 5966 (2982500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1518 7 episodes - episode_reward: -157.944 [-250.269, 13.362] - loss: 15.823 - mae: 84.688 - mean_q: -111.808 Interval 5967 (2983000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1524 6 episodes - episode_reward: -179.846 [-278.778, -28.027] - loss: 11.660 - mae: 84.652 - mean_q: -111.788 Interval 5968 (2983500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2155 9 episodes - episode_reward: -172.005 [-246.596, -113.397] - loss: 11.589 - mae: 84.638 - mean_q: -111.785 Interval 5969 (2984000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1422 10 episodes - episode_reward: -156.812 [-221.447, -100.000] - loss: 11.091 - mae: 84.631 - mean_q: -111.811 Interval 5970 (2984500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8290 8 episodes - episode_reward: -184.535 [-220.253, -120.684] - loss: 11.888 - mae: 84.626 - mean_q: -111.831 Interval 5971 (2985000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7496 8 episodes - episode_reward: -162.670 [-203.314, -110.294] - loss: 11.428 - mae: 84.622 - mean_q: -111.847 Interval 5972 (2985500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5557 9 episodes - episode_reward: -202.375 [-304.313, -114.984] - loss: 10.830 - mae: 84.630 - mean_q: -111.847 Interval 5973 (2986000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4472 8 episodes - episode_reward: -162.087 [-356.572, -33.930] - loss: 9.757 - mae: 84.624 - mean_q: -111.845 Interval 5974 (2986500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5391 8 episodes - episode_reward: -153.638 [-217.207, -63.156] - loss: 12.687 - mae: 84.665 - mean_q: -111.834 Interval 5975 (2987000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0487 8 episodes - episode_reward: -191.050 [-238.992, -155.204] - loss: 13.281 - mae: 84.659 - mean_q: -111.809 Interval 5976 (2987500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6752 6 episodes - episode_reward: -212.827 [-249.522, -191.682] - loss: 11.437 - mae: 84.651 - mean_q: -111.799 Interval 5977 (2988000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4158 8 episodes - episode_reward: -217.879 [-380.926, -113.123] - loss: 15.209 - mae: 84.665 - mean_q: -111.772 Interval 5978 (2988500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7557 8 episodes - episode_reward: -174.240 [-224.736, -110.437] - loss: 14.069 - mae: 84.627 - mean_q: -111.764 Interval 5979 (2989000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3932 8 episodes - episode_reward: -139.990 [-299.829, -3.170] - loss: 10.185 - mae: 84.600 - mean_q: -111.761 Interval 5980 (2989500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1166 8 episodes - episode_reward: -205.899 [-273.751, -136.527] - loss: 10.109 - mae: 84.608 - mean_q: -111.767 Interval 5981 (2990000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1808 7 episodes - episode_reward: -155.616 [-237.967, 6.752] - loss: 13.295 - mae: 84.640 - mean_q: -111.771 Interval 5982 (2990500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4613 7 episodes - episode_reward: -174.082 [-243.605, -62.027] - loss: 16.381 - mae: 84.625 - mean_q: -111.739 Interval 5983 (2991000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5384 8 episodes - episode_reward: -158.128 [-197.233, -103.377] - loss: 11.987 - mae: 84.596 - mean_q: -111.744 Interval 5984 (2991500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6082 8 episodes - episode_reward: -151.888 [-205.420, -95.407] - loss: 13.250 - mae: 84.594 - mean_q: -111.747 Interval 5985 (2992000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1545 7 episodes - episode_reward: -233.467 [-314.913, -161.745] - loss: 10.347 - mae: 84.573 - mean_q: -111.742 Interval 5986 (2992500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6511 6 episodes - episode_reward: -223.479 [-273.783, -180.459] - loss: 10.914 - mae: 84.574 - mean_q: -111.752 Interval 5987 (2993000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5157 7 episodes - episode_reward: -180.750 [-270.916, -134.334] - loss: 14.189 - mae: 84.578 - mean_q: -111.764 Interval 5988 (2993500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0401 9 episodes - episode_reward: -165.980 [-270.658, -100.000] - loss: 13.667 - mae: 84.583 - mean_q: -111.749 Interval 5989 (2994000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9494 8 episodes - episode_reward: -188.232 [-332.401, -128.326] - loss: 10.611 - mae: 84.564 - mean_q: -111.746 Interval 5990 (2994500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0595 8 episodes - episode_reward: -183.866 [-320.284, -57.046] - loss: 7.397 - mae: 84.538 - mean_q: -111.785 Interval 5991 (2995000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4839 8 episodes - episode_reward: -160.421 [-266.892, -84.772] - loss: 11.579 - mae: 84.567 - mean_q: -111.797 Interval 5992 (2995500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5657 7 episodes - episode_reward: -186.143 [-273.261, -140.287] - loss: 13.293 - mae: 84.567 - mean_q: -111.796 Interval 5993 (2996000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8374 7 episodes - episode_reward: -196.993 [-381.692, -96.166] - loss: 12.734 - mae: 84.573 - mean_q: -111.772 Interval 5994 (2996500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5583 6 episodes - episode_reward: -206.830 [-255.051, -171.182] - loss: 10.216 - mae: 84.560 - mean_q: -111.758 Interval 5995 (2997000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5863 9 episodes - episode_reward: -201.776 [-263.641, -159.876] - loss: 10.084 - mae: 84.546 - mean_q: -111.776 Interval 5996 (2997500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2175 9 episodes - episode_reward: -186.003 [-231.388, -100.000] - loss: 14.837 - mae: 84.561 - mean_q: -111.771 Interval 5997 (2998000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3240 9 episodes - episode_reward: -184.042 [-261.603, -100.000] - loss: 11.537 - mae: 84.551 - mean_q: -111.778 Interval 5998 (2998500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6089 7 episodes - episode_reward: -186.637 [-235.614, -147.104] - loss: 11.291 - mae: 84.550 - mean_q: -111.794 Interval 5999 (2999000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4522 7 episodes - episode_reward: -104.980 [-183.835, 38.824] - loss: 13.609 - mae: 84.546 - mean_q: -111.774 Interval 6000 (2999500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8271 8 episodes - episode_reward: -174.636 [-222.052, -141.553] - loss: 12.207 - mae: 84.527 - mean_q: -111.797 Interval 6001 (3000000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8497 7 episodes - episode_reward: -119.315 [-199.403, 27.395] - loss: 13.882 - mae: 84.534 - mean_q: -111.800 Interval 6002 (3000500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1647 7 episodes - episode_reward: -160.092 [-224.847, 57.337] - loss: 10.990 - mae: 84.517 - mean_q: -111.805 Interval 6003 (3001000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4424 11 episodes - episode_reward: -160.865 [-226.096, -100.000] - loss: 11.575 - mae: 84.520 - mean_q: -111.815 Interval 6004 (3001500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2013 7 episodes - episode_reward: -156.093 [-241.195, -90.744] - loss: 10.107 - mae: 84.507 - mean_q: -111.827 Interval 6005 (3002000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0553 8 episodes - episode_reward: -193.077 [-243.011, -100.000] - loss: 18.715 - mae: 84.542 - mean_q: -111.820 Interval 6006 (3002500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7031 9 episodes - episode_reward: -154.205 [-225.141, -82.691] - loss: 11.646 - mae: 84.513 - mean_q: -111.784 Interval 6007 (3003000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1417 9 episodes - episode_reward: -165.129 [-200.883, -128.605] - loss: 11.985 - mae: 84.526 - mean_q: -111.773 Interval 6008 (3003500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6825 8 episodes - episode_reward: -176.353 [-259.166, 20.351] - loss: 10.531 - mae: 84.529 - mean_q: -111.778 Interval 6009 (3004000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4440 6 episodes - episode_reward: -192.423 [-326.157, -114.782] - loss: 15.358 - mae: 84.548 - mean_q: -111.785 Interval 6010 (3004500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5409 10 episodes - episode_reward: -178.752 [-208.032, -137.939] - loss: 15.634 - mae: 84.545 - mean_q: -111.764 Interval 6011 (3005000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7454 6 episodes - episode_reward: -235.550 [-282.019, -200.785] - loss: 14.927 - mae: 84.538 - mean_q: -111.748 Interval 6012 (3005500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5646 11 episodes - episode_reward: -164.121 [-239.382, -100.000] - loss: 13.001 - mae: 84.528 - mean_q: -111.737 Interval 6013 (3006000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8084 8 episodes - episode_reward: -171.704 [-236.775, -127.585] - loss: 14.981 - mae: 84.537 - mean_q: -111.713 Interval 6014 (3006500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3856 8 episodes - episode_reward: -149.564 [-295.848, 14.339] - loss: 20.758 - mae: 84.547 - mean_q: -111.669 Interval 6015 (3007000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1782 6 episodes - episode_reward: -186.902 [-250.046, -95.980] - loss: 15.361 - mae: 84.517 - mean_q: -111.649 Interval 6016 (3007500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4236 7 episodes - episode_reward: -163.082 [-263.573, 30.956] - loss: 13.846 - mae: 84.495 - mean_q: -111.652 Interval 6017 (3008000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8430 7 episodes - episode_reward: -206.586 [-381.030, 11.416] - loss: 12.365 - mae: 84.484 - mean_q: -111.630 Interval 6018 (3008500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5963 8 episodes - episode_reward: -155.821 [-206.815, -100.000] - loss: 13.210 - mae: 84.466 - mean_q: -111.637 Interval 6019 (3009000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6916 6 episodes - episode_reward: -224.156 [-402.867, -167.664] - loss: 16.330 - mae: 84.473 - mean_q: -111.628 Interval 6020 (3009500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5380 8 episodes - episode_reward: -227.002 [-383.602, -105.473] - loss: 10.938 - mae: 84.431 - mean_q: -111.601 Interval 6021 (3010000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2423 10 episodes - episode_reward: -160.560 [-246.393, -75.498] - loss: 15.222 - mae: 84.441 - mean_q: -111.611 Interval 6022 (3010500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5636 8 episodes - episode_reward: -160.785 [-245.174, -100.000] - loss: 11.446 - mae: 84.420 - mean_q: -111.602 Interval 6023 (3011000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7195 7 episodes - episode_reward: -187.011 [-252.070, -126.374] - loss: 14.895 - mae: 84.431 - mean_q: -111.598 Interval 6024 (3011500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4528 8 episodes - episode_reward: -161.765 [-243.591, -47.533] - loss: 12.650 - mae: 84.409 - mean_q: -111.586 Interval 6025 (3012000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7916 7 episodes - episode_reward: -186.760 [-236.323, -126.338] - loss: 12.711 - mae: 84.398 - mean_q: -111.565 Interval 6026 (3012500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6970 7 episodes - episode_reward: -202.812 [-245.677, -150.370] - loss: 10.512 - mae: 84.379 - mean_q: -111.584 Interval 6027 (3013000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7005 7 episodes - episode_reward: -197.902 [-275.492, -137.514] - loss: 14.867 - mae: 84.391 - mean_q: -111.588 Interval 6028 (3013500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9108 8 episodes - episode_reward: -173.341 [-255.546, -111.671] - loss: 9.188 - mae: 84.368 - mean_q: -111.575 Interval 6029 (3014000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9432 8 episodes - episode_reward: -192.338 [-268.699, -90.331] - loss: 11.341 - mae: 84.394 - mean_q: -111.596 Interval 6030 (3014500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7914 7 episodes - episode_reward: -191.967 [-259.852, -135.636] - loss: 14.073 - mae: 84.403 - mean_q: -111.587 Interval 6031 (3015000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9146 8 episodes - episode_reward: -182.877 [-308.945, -100.000] - loss: 15.685 - mae: 84.397 - mean_q: -111.577 Interval 6032 (3015500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5598 7 episodes - episode_reward: -178.515 [-289.158, 5.487] - loss: 14.346 - mae: 84.384 - mean_q: -111.559 Interval 6033 (3016000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5316 9 episodes - episode_reward: -193.412 [-429.185, -100.000] - loss: 17.426 - mae: 84.390 - mean_q: -111.550 Interval 6034 (3016500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6266 8 episodes - episode_reward: -171.236 [-249.314, -52.071] - loss: 13.575 - mae: 84.378 - mean_q: -111.550 Interval 6035 (3017000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1480 10 episodes - episode_reward: -157.550 [-217.109, -119.323] - loss: 14.968 - mae: 84.380 - mean_q: -111.546 Interval 6036 (3017500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8904 6 episodes - episode_reward: -165.353 [-281.351, 4.339] - loss: 14.176 - mae: 84.367 - mean_q: -111.543 Interval 6037 (3018000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2748 7 episodes - episode_reward: -233.778 [-357.214, -169.773] - loss: 9.621 - mae: 84.353 - mean_q: -111.546 Interval 6038 (3018500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9130 8 episodes - episode_reward: -179.178 [-307.630, -118.396] - loss: 15.805 - mae: 84.379 - mean_q: -111.547 Interval 6039 (3019000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1428 7 episodes - episode_reward: -153.111 [-228.587, 1.223] - loss: 11.846 - mae: 84.358 - mean_q: -111.557 Interval 6040 (3019500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.5633 8 episodes - episode_reward: -350.457 [-508.597, -218.054] - loss: 12.854 - mae: 84.339 - mean_q: -111.575 Interval 6041 (3020000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1211 8 episodes - episode_reward: -251.339 [-501.446, -120.810] - loss: 13.726 - mae: 84.371 - mean_q: -111.578 Interval 6042 (3020500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3127 10 episodes - episode_reward: -166.853 [-270.590, -100.000] - loss: 11.948 - mae: 84.360 - mean_q: -111.576 Interval 6043 (3021000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4212 9 episodes - episode_reward: -245.796 [-727.456, -100.000] - loss: 11.908 - mae: 84.373 - mean_q: -111.588 Interval 6044 (3021500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -7.3913 9 episodes - episode_reward: -407.572 [-919.277, -126.867] - loss: 9.129 - mae: 84.409 - mean_q: -111.609 Interval 6045 (3022000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3456 8 episodes - episode_reward: -211.972 [-265.277, -173.689] - loss: 16.104 - mae: 84.491 - mean_q: -111.624 Interval 6046 (3022500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7947 6 episodes - episode_reward: -226.244 [-279.410, -197.801] - loss: 15.204 - mae: 84.524 - mean_q: -111.593 Interval 6047 (3023000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8455 7 episodes - episode_reward: -204.238 [-227.389, -172.627] - loss: 15.785 - mae: 84.526 - mean_q: -111.577 Interval 6048 (3023500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9323 8 episodes - episode_reward: -188.286 [-267.205, -137.159] - loss: 11.744 - mae: 84.517 - mean_q: -111.601 Interval 6049 (3024000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6022 8 episodes - episode_reward: -166.796 [-243.220, -85.793] - loss: 13.353 - mae: 84.523 - mean_q: -111.611 Interval 6050 (3024500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2415 8 episodes - episode_reward: -202.116 [-265.663, -118.566] - loss: 14.389 - mae: 84.501 - mean_q: -111.627 Interval 6051 (3025000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5338 7 episodes - episode_reward: -181.549 [-242.348, -31.257] - loss: 11.890 - mae: 84.471 - mean_q: -111.643 Interval 6052 (3025500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7441 8 episodes - episode_reward: -164.415 [-203.541, -121.088] - loss: 16.695 - mae: 84.475 - mean_q: -111.638 Interval 6053 (3026000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7166 7 episodes - episode_reward: -199.568 [-278.773, -146.717] - loss: 13.898 - mae: 84.446 - mean_q: -111.632 Interval 6054 (3026500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6154 7 episodes - episode_reward: -181.103 [-292.133, 35.219] - loss: 14.804 - mae: 84.438 - mean_q: -111.634 Interval 6055 (3027000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1992 5 episodes - episode_reward: -375.770 [-834.178, -150.621] - loss: 15.258 - mae: 84.422 - mean_q: -111.602 Interval 6056 (3027500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.0159 7 episodes - episode_reward: -439.436 [-600.094, -100.000] - loss: 10.324 - mae: 84.395 - mean_q: -111.564 Interval 6057 (3028000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9912 5 episodes - episode_reward: -424.918 [-897.861, -128.780] - loss: 14.407 - mae: 84.453 - mean_q: -111.613 Interval 6058 (3028500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3888 8 episodes - episode_reward: -162.210 [-210.857, 5.135] - loss: 10.821 - mae: 84.491 - mean_q: -111.640 Interval 6059 (3029000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5705 7 episodes - episode_reward: -177.779 [-330.974, -34.436] - loss: 14.787 - mae: 84.541 - mean_q: -111.637 Interval 6060 (3029500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0089 12 episodes - episode_reward: -168.596 [-284.670, -100.000] - loss: 12.369 - mae: 84.572 - mean_q: -111.658 Interval 6061 (3030000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8969 7 episodes - episode_reward: -126.602 [-200.593, 35.927] - loss: 15.322 - mae: 84.611 - mean_q: -111.658 Interval 6062 (3030500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1456 8 episodes - episode_reward: -202.525 [-258.716, -39.887] - loss: 11.606 - mae: 84.626 - mean_q: -111.678 Interval 6063 (3031000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1620 7 episodes - episode_reward: -152.565 [-203.206, -71.855] - loss: 10.736 - mae: 84.645 - mean_q: -111.686 Interval 6064 (3031500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0789 8 episodes - episode_reward: -195.822 [-248.855, -149.224] - loss: 12.423 - mae: 84.674 - mean_q: -111.710 Interval 6065 (3032000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3706 7 episodes - episode_reward: -160.619 [-218.985, -100.000] - loss: 17.305 - mae: 84.708 - mean_q: -111.723 Interval 6066 (3032500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9037 8 episodes - episode_reward: -178.546 [-277.021, -17.588] - loss: 11.975 - mae: 84.676 - mean_q: -111.723 Interval 6067 (3033000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6780 8 episodes - episode_reward: -234.025 [-375.075, -145.132] - loss: 15.387 - mae: 84.706 - mean_q: -111.704 Interval 6068 (3033500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5083 7 episodes - episode_reward: -175.685 [-229.896, -100.000] - loss: 13.249 - mae: 84.713 - mean_q: -111.718 Interval 6069 (3034000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8715 8 episodes - episode_reward: -179.778 [-204.032, -163.743] - loss: 12.804 - mae: 84.745 - mean_q: -111.739 Interval 6070 (3034500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9667 9 episodes - episode_reward: -167.122 [-226.857, -24.132] - loss: 14.045 - mae: 84.743 - mean_q: -111.750 Interval 6071 (3035000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1146 7 episodes - episode_reward: -153.618 [-227.673, -3.394] - loss: 15.074 - mae: 84.771 - mean_q: -111.774 Interval 6072 (3035500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3766 6 episodes - episode_reward: -194.273 [-261.275, -144.214] - loss: 13.915 - mae: 84.768 - mean_q: -111.796 Interval 6073 (3036000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4784 6 episodes - episode_reward: -206.585 [-283.284, -128.155] - loss: 12.966 - mae: 84.761 - mean_q: -111.811 Interval 6074 (3036500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9338 9 episodes - episode_reward: -168.595 [-204.562, -142.972] - loss: 13.789 - mae: 84.774 - mean_q: -111.822 Interval 6075 (3037000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0035 7 episodes - episode_reward: -207.882 [-317.407, -111.271] - loss: 11.714 - mae: 84.764 - mean_q: -111.847 Interval 6076 (3037500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3028 11 episodes - episode_reward: -154.764 [-220.745, -62.640] - loss: 12.699 - mae: 84.776 - mean_q: -111.891 Interval 6077 (3038000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4767 6 episodes - episode_reward: -200.127 [-283.163, -144.329] - loss: 12.556 - mae: 84.789 - mean_q: -111.909 Interval 6078 (3038500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7615 9 episodes - episode_reward: -155.676 [-209.912, -107.455] - loss: 12.423 - mae: 84.800 - mean_q: -111.911 Interval 6079 (3039000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1071 8 episodes - episode_reward: -189.814 [-264.788, -118.181] - loss: 14.558 - mae: 84.833 - mean_q: -111.904 Interval 6080 (3039500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4554 10 episodes - episode_reward: -178.858 [-245.262, -149.255] - loss: 13.916 - mae: 84.858 - mean_q: -111.884 Interval 6081 (3040000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2675 6 episodes - episode_reward: -181.320 [-248.679, -109.666] - loss: 12.837 - mae: 84.871 - mean_q: -111.890 Interval 6082 (3040500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6730 7 episodes - episode_reward: -180.679 [-236.362, -117.573] - loss: 14.127 - mae: 84.901 - mean_q: -111.880 Interval 6083 (3041000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1042 9 episodes - episode_reward: -180.554 [-319.454, -100.000] - loss: 11.034 - mae: 84.882 - mean_q: -111.890 Interval 6084 (3041500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3430 9 episodes - episode_reward: -130.679 [-205.382, 4.796] - loss: 11.816 - mae: 84.889 - mean_q: -111.921 Interval 6085 (3042000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4378 10 episodes - episode_reward: -173.200 [-276.887, -107.507] - loss: 13.571 - mae: 84.915 - mean_q: -111.941 Interval 6086 (3042500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5575 8 episodes - episode_reward: -156.117 [-254.336, -4.777] - loss: 13.616 - mae: 84.931 - mean_q: -111.916 Interval 6087 (3043000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8370 7 episodes - episode_reward: -201.086 [-280.652, -154.405] - loss: 11.175 - mae: 84.926 - mean_q: -111.920 Interval 6088 (3043500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2230 7 episodes - episode_reward: -166.172 [-277.954, -19.101] - loss: 11.846 - mae: 84.945 - mean_q: -111.906 Interval 6089 (3044000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9011 8 episodes - episode_reward: -179.248 [-241.716, -115.270] - loss: 15.320 - mae: 84.973 - mean_q: -111.935 Interval 6090 (3044500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8524 8 episodes - episode_reward: -181.114 [-251.427, -143.149] - loss: 13.434 - mae: 84.972 - mean_q: -111.939 Interval 6091 (3045000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1802 8 episodes - episode_reward: -189.887 [-235.267, -111.979] - loss: 10.977 - mae: 84.970 - mean_q: -111.928 Interval 6092 (3045500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8113 7 episodes - episode_reward: -206.485 [-311.473, -142.291] - loss: 14.029 - mae: 85.004 - mean_q: -111.945 Interval 6093 (3046000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4947 7 episodes - episode_reward: -174.879 [-257.425, -115.625] - loss: 12.274 - mae: 84.986 - mean_q: -111.946 Interval 6094 (3046500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3912 9 episodes - episode_reward: -139.310 [-194.490, -21.789] - loss: 14.692 - mae: 85.004 - mean_q: -111.936 Interval 6095 (3047000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5395 7 episodes - episode_reward: -166.630 [-227.843, -119.080] - loss: 12.312 - mae: 84.996 - mean_q: -111.876 Interval 6096 (3047500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0851 8 episodes - episode_reward: -204.165 [-240.590, -148.263] - loss: 11.695 - mae: 84.984 - mean_q: -111.901 Interval 6097 (3048000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0232 8 episodes - episode_reward: -189.908 [-331.361, -134.274] - loss: 11.581 - mae: 84.995 - mean_q: -111.886 Interval 6098 (3048500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3944 9 episodes - episode_reward: -129.132 [-184.281, 50.757] - loss: 10.437 - mae: 84.981 - mean_q: -111.875 Interval 6099 (3049000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8994 7 episodes - episode_reward: -212.642 [-316.233, -139.557] - loss: 12.870 - mae: 85.001 - mean_q: -111.892 Interval 6100 (3049500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0957 9 episodes - episode_reward: -168.894 [-211.289, -130.960] - loss: 15.372 - mae: 84.995 - mean_q: -111.871 Interval 6101 (3050000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3254 9 episodes - episode_reward: -183.387 [-250.217, -125.991] - loss: 11.965 - mae: 84.987 - mean_q: -111.899 Interval 6102 (3050500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8131 7 episodes - episode_reward: -198.768 [-285.020, -159.487] - loss: 14.144 - mae: 84.997 - mean_q: -111.927 Interval 6103 (3051000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1710 9 episodes - episode_reward: -173.516 [-235.324, -100.000] - loss: 11.220 - mae: 84.979 - mean_q: -111.924 Interval 6104 (3051500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6110 7 episodes - episode_reward: -196.326 [-255.835, -148.717] - loss: 12.555 - mae: 84.981 - mean_q: -111.953 Interval 6105 (3052000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1231 8 episodes - episode_reward: -195.891 [-259.385, -143.699] - loss: 12.934 - mae: 84.998 - mean_q: -111.941 Interval 6106 (3052500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1809 7 episodes - episode_reward: -143.457 [-319.305, 92.950] - loss: 11.683 - mae: 85.005 - mean_q: -111.955 Interval 6107 (3053000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9933 10 episodes - episode_reward: -158.447 [-326.409, 14.072] - loss: 13.665 - mae: 85.020 - mean_q: -111.979 Interval 6108 (3053500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8718 8 episodes - episode_reward: -175.795 [-278.875, -104.953] - loss: 10.912 - mae: 85.014 - mean_q: -111.979 Interval 6109 (3054000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.2959 7 episodes - episode_reward: -98.354 [-163.889, -24.465] - loss: 15.537 - mae: 85.039 - mean_q: -111.992 Interval 6110 (3054500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4927 9 episodes - episode_reward: -182.840 [-364.448, -100.000] - loss: 13.014 - mae: 85.041 - mean_q: -112.004 Interval 6111 (3055000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5714 6 episodes - episode_reward: -207.852 [-270.200, -182.778] - loss: 13.393 - mae: 85.049 - mean_q: -112.019 Interval 6112 (3055500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6164 7 episodes - episode_reward: -200.680 [-305.368, -137.346] - loss: 15.908 - mae: 85.062 - mean_q: -112.025 Interval 6113 (3056000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8635 8 episodes - episode_reward: -172.490 [-225.153, -120.482] - loss: 14.646 - mae: 85.055 - mean_q: -112.020 Interval 6114 (3056500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2023 11 episodes - episode_reward: -149.540 [-196.840, -71.773] - loss: 12.991 - mae: 85.082 - mean_q: -112.026 Interval 6115 (3057000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2906 10 episodes - episode_reward: -168.597 [-245.273, -94.082] - loss: 10.352 - mae: 85.069 - mean_q: -112.036 Interval 6116 (3057500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7698 6 episodes - episode_reward: -214.854 [-302.438, -162.840] - loss: 12.260 - mae: 85.078 - mean_q: -112.051 Interval 6117 (3058000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2763 7 episodes - episode_reward: -237.441 [-358.716, -155.161] - loss: 12.866 - mae: 85.103 - mean_q: -112.062 Interval 6118 (3058500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7219 10 episodes - episode_reward: -188.936 [-266.528, -100.000] - loss: 11.795 - mae: 85.101 - mean_q: -112.079 Interval 6119 (3059000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2100 7 episodes - episode_reward: -152.288 [-241.254, -25.122] - loss: 11.223 - mae: 85.094 - mean_q: -112.115 Interval 6120 (3059500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3359 9 episodes - episode_reward: -195.103 [-241.503, -125.659] - loss: 14.769 - mae: 85.108 - mean_q: -112.143 Interval 6121 (3060000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7498 7 episodes - episode_reward: -186.297 [-245.488, -122.684] - loss: 12.350 - mae: 85.118 - mean_q: -112.167 Interval 6122 (3060500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9011 9 episodes - episode_reward: -219.603 [-366.343, -100.586] - loss: 13.202 - mae: 85.129 - mean_q: -112.178 Interval 6123 (3061000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5868 7 episodes - episode_reward: -183.539 [-304.504, 17.787] - loss: 16.530 - mae: 85.148 - mean_q: -112.172 Interval 6124 (3061500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1805 7 episodes - episode_reward: -230.359 [-374.129, -159.838] - loss: 11.995 - mae: 85.125 - mean_q: -112.170 Interval 6125 (3062000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2065 9 episodes - episode_reward: -181.880 [-304.162, -110.691] - loss: 13.175 - mae: 85.162 - mean_q: -112.167 Interval 6126 (3062500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2974 8 episodes - episode_reward: -205.793 [-251.134, -111.940] - loss: 10.306 - mae: 85.160 - mean_q: -112.201 Interval 6127 (3063000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5963 7 episodes - episode_reward: -164.793 [-232.111, -99.757] - loss: 15.439 - mae: 85.198 - mean_q: -112.218 Interval 6128 (3063500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1216 7 episodes - episode_reward: -168.668 [-256.152, 0.969] - loss: 12.439 - mae: 85.184 - mean_q: -112.221 Interval 6129 (3064000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7351 10 episodes - episode_reward: -185.975 [-264.400, -150.331] - loss: 15.786 - mae: 85.197 - mean_q: -112.239 Interval 6130 (3064500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6524 9 episodes - episode_reward: -143.655 [-212.075, -100.000] - loss: 14.247 - mae: 85.211 - mean_q: -112.230 Interval 6131 (3065000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7116 8 episodes - episode_reward: -174.135 [-218.892, -139.229] - loss: 13.014 - mae: 85.217 - mean_q: -112.225 Interval 6132 (3065500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8673 8 episodes - episode_reward: -179.697 [-249.248, -102.909] - loss: 13.871 - mae: 85.242 - mean_q: -112.215 Interval 6133 (3066000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7564 6 episodes - episode_reward: -221.514 [-273.324, -158.052] - loss: 11.306 - mae: 85.214 - mean_q: -112.221 Interval 6134 (3066500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3845 9 episodes - episode_reward: -189.148 [-252.018, -123.738] - loss: 10.024 - mae: 85.223 - mean_q: -112.261 Interval 6135 (3067000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6527 10 episodes - episode_reward: -189.007 [-249.818, -150.474] - loss: 11.823 - mae: 85.243 - mean_q: -112.271 Interval 6136 (3067500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5490 7 episodes - episode_reward: -180.260 [-258.186, -88.403] - loss: 12.336 - mae: 85.238 - mean_q: -112.304 Interval 6137 (3068000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7678 8 episodes - episode_reward: -174.410 [-272.563, -118.912] - loss: 12.141 - mae: 85.259 - mean_q: -112.292 Interval 6138 (3068500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0978 9 episodes - episode_reward: -166.118 [-257.958, -100.000] - loss: 11.473 - mae: 85.253 - mean_q: -112.311 Interval 6139 (3069000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1785 9 episodes - episode_reward: -182.472 [-299.505, -112.036] - loss: 12.572 - mae: 85.268 - mean_q: -112.331 Interval 6140 (3069500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7776 6 episodes - episode_reward: -138.774 [-332.040, 28.537] - loss: 9.580 - mae: 85.245 - mean_q: -112.347 Interval 6141 (3070000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6873 8 episodes - episode_reward: -169.172 [-222.915, -110.752] - loss: 11.592 - mae: 85.237 - mean_q: -112.365 Interval 6142 (3070500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7726 10 episodes - episode_reward: -185.437 [-253.758, -100.000] - loss: 12.495 - mae: 85.231 - mean_q: -112.373 Interval 6143 (3071000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6996 8 episodes - episode_reward: -168.703 [-287.971, -9.976] - loss: 14.938 - mae: 85.225 - mean_q: -112.357 Interval 6144 (3071500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9730 8 episodes - episode_reward: -193.122 [-325.030, -124.057] - loss: 11.802 - mae: 85.179 - mean_q: -112.339 Interval 6145 (3072000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9240 7 episodes - episode_reward: -203.486 [-294.815, -138.853] - loss: 9.737 - mae: 85.145 - mean_q: -112.344 Interval 6146 (3072500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1727 6 episodes - episode_reward: -185.582 [-258.493, -116.795] - loss: 14.581 - mae: 85.130 - mean_q: -112.350 Interval 6147 (3073000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3416 8 episodes - episode_reward: -148.014 [-207.328, -55.926] - loss: 16.417 - mae: 85.102 - mean_q: -112.317 Interval 6148 (3073500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5126 6 episodes - episode_reward: -198.007 [-299.445, -59.487] - loss: 12.019 - mae: 85.056 - mean_q: -112.303 Interval 6149 (3074000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8166 8 episodes - episode_reward: -186.626 [-375.119, -107.169] - loss: 12.914 - mae: 85.053 - mean_q: -112.293 Interval 6150 (3074500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9816 8 episodes - episode_reward: -183.559 [-258.594, -131.380] - loss: 12.989 - mae: 85.024 - mean_q: -112.282 Interval 6151 (3075000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0727 7 episodes - episode_reward: -215.280 [-290.795, -184.762] - loss: 13.646 - mae: 85.004 - mean_q: -112.258 Interval 6152 (3075500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8555 9 episodes - episode_reward: -164.968 [-271.359, -51.726] - loss: 10.344 - mae: 84.966 - mean_q: -112.251 Interval 6153 (3076000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2006 7 episodes - episode_reward: -161.923 [-183.026, -140.911] - loss: 11.039 - mae: 84.947 - mean_q: -112.254 Interval 6154 (3076500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0314 8 episodes - episode_reward: -180.983 [-284.564, -100.000] - loss: 11.548 - mae: 84.932 - mean_q: -112.251 Interval 6155 (3077000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5986 9 episodes - episode_reward: -142.964 [-235.240, 67.357] - loss: 14.541 - mae: 84.884 - mean_q: -112.249 Interval 6156 (3077500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9936 10 episodes - episode_reward: -154.530 [-190.160, -100.625] - loss: 14.321 - mae: 84.842 - mean_q: -112.241 Interval 6157 (3078000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4733 7 episodes - episode_reward: -170.832 [-203.327, -113.575] - loss: 15.685 - mae: 84.799 - mean_q: -112.190 Interval 6158 (3078500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.4187 11 episodes - episode_reward: -248.691 [-1153.780, -106.485] - loss: 14.881 - mae: 84.738 - mean_q: -112.170 Interval 6159 (3079000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.7855 8 episodes - episode_reward: -425.305 [-1038.081, -154.723] - loss: 11.460 - mae: 84.696 - mean_q: -112.164 Interval 6160 (3079500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9163 8 episodes - episode_reward: -182.297 [-237.596, -100.000] - loss: 10.504 - mae: 84.685 - mean_q: -112.181 Interval 6161 (3080000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4852 8 episodes - episode_reward: -211.654 [-254.067, -168.834] - loss: 12.222 - mae: 84.685 - mean_q: -112.176 Interval 6162 (3080500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -3.1334 8 episodes - episode_reward: -199.787 [-321.189, -133.983] - loss: 12.037 - mae: 84.662 - mean_q: -112.184 Interval 6163 (3081000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0938 9 episodes - episode_reward: -167.749 [-241.400, -117.488] - loss: 12.318 - mae: 84.648 - mean_q: -112.199 Interval 6164 (3081500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.8207 9 episodes - episode_reward: -265.499 [-671.925, -68.682] - loss: 8.295 - mae: 84.607 - mean_q: -112.208 Interval 6165 (3082000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.7915 6 episodes - episode_reward: -339.637 [-457.349, -255.461] - loss: 12.047 - mae: 84.638 - mean_q: -112.184 Interval 6166 (3082500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.4744 6 episodes - episode_reward: -443.606 [-1623.916, -169.979] - loss: 12.670 - mae: 84.657 - mean_q: -112.187 Interval 6167 (3083000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4531 10 episodes - episode_reward: -174.537 [-243.889, -100.000] - loss: 13.318 - mae: 84.686 - mean_q: -112.165 Interval 6168 (3083500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1014 8 episodes - episode_reward: -191.866 [-368.757, -137.081] - loss: 12.944 - mae: 84.709 - mean_q: -112.174 Interval 6169 (3084000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0723 8 episodes - episode_reward: -192.122 [-314.255, -134.001] - loss: 10.745 - mae: 84.722 - mean_q: -112.175 Interval 6170 (3084500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1306 8 episodes - episode_reward: -189.003 [-302.010, -81.084] - loss: 11.068 - mae: 84.747 - mean_q: -112.202 Interval 6171 (3085000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0319 8 episodes - episode_reward: -185.786 [-277.833, -125.311] - loss: 11.661 - mae: 84.772 - mean_q: -112.196 Interval 6172 (3085500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0592 9 episodes - episode_reward: -179.711 [-302.588, -99.049] - loss: 11.987 - mae: 84.802 - mean_q: -112.204 Interval 6173 (3086000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3431 7 episodes - episode_reward: -154.991 [-269.234, -84.214] - loss: 9.016 - mae: 84.809 - mean_q: -112.213 Interval 6174 (3086500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3318 8 episodes - episode_reward: -153.412 [-218.687, -9.218] - loss: 11.609 - mae: 84.838 - mean_q: -112.234 Interval 6175 (3087000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9792 7 episodes - episode_reward: -205.755 [-303.625, -95.501] - loss: 11.369 - mae: 84.853 - mean_q: -112.247 Interval 6176 (3087500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0499 8 episodes - episode_reward: -200.403 [-264.933, -157.813] - loss: 11.537 - mae: 84.884 - mean_q: -112.258 Interval 6177 (3088000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8778 7 episodes - episode_reward: -202.625 [-353.317, -80.295] - loss: 10.379 - mae: 84.899 - mean_q: -112.256 Interval 6178 (3088500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0774 7 episodes - episode_reward: -153.105 [-213.519, -0.009] - loss: 10.581 - mae: 84.915 - mean_q: -112.271 Interval 6179 (3089000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6130 6 episodes - episode_reward: -203.593 [-236.125, -175.162] - loss: 10.322 - mae: 84.931 - mean_q: -112.305 Interval 6180 (3089500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3980 6 episodes - episode_reward: -192.474 [-314.375, -59.714] - loss: 13.326 - mae: 84.966 - mean_q: -112.310 Interval 6181 (3090000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9947 7 episodes - episode_reward: -219.531 [-360.275, -122.884] - loss: 12.420 - mae: 84.979 - mean_q: -112.313 Interval 6182 (3090500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5655 9 episodes - episode_reward: -147.104 [-230.418, -27.257] - loss: 13.119 - mae: 84.998 - mean_q: -112.317 Interval 6183 (3091000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3254 10 episodes - episode_reward: -169.668 [-235.033, -100.000] - loss: 13.353 - mae: 85.012 - mean_q: -112.318 Interval 6184 (3091500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1109 10 episodes - episode_reward: -155.545 [-235.147, -100.000] - loss: 14.067 - mae: 85.031 - mean_q: -112.313 Interval 6185 (3092000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9806 8 episodes - episode_reward: -183.651 [-214.263, -150.228] - loss: 14.727 - mae: 85.052 - mean_q: -112.308 Interval 6186 (3092500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6186 8 episodes - episode_reward: -164.573 [-223.902, -85.857] - loss: 10.772 - mae: 85.047 - mean_q: -112.306 Interval 6187 (3093000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6362 7 episodes - episode_reward: -189.341 [-247.986, -130.462] - loss: 14.922 - mae: 85.074 - mean_q: -112.307 Interval 6188 (3093500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8654 8 episodes - episode_reward: -173.542 [-231.360, -100.000] - loss: 13.181 - mae: 85.063 - mean_q: -112.302 Interval 6189 (3094000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3237 6 episodes - episode_reward: -187.738 [-271.155, -85.988] - loss: 12.481 - mae: 85.073 - mean_q: -112.305 Interval 6190 (3094500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9364 8 episodes - episode_reward: -184.959 [-229.269, -149.578] - loss: 12.152 - mae: 85.080 - mean_q: -112.334 Interval 6191 (3095000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7483 8 episodes - episode_reward: -180.955 [-404.857, -9.291] - loss: 15.451 - mae: 85.102 - mean_q: -112.338 Interval 6192 (3095500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9831 8 episodes - episode_reward: -185.373 [-255.483, -131.900] - loss: 11.857 - mae: 85.084 - mean_q: -112.332 Interval 6193 (3096000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1003 9 episodes - episode_reward: -175.209 [-228.824, -116.156] - loss: 11.110 - mae: 85.108 - mean_q: -112.349 Interval 6194 (3096500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8218 8 episodes - episode_reward: -172.004 [-252.304, -99.058] - loss: 11.852 - mae: 85.105 - mean_q: -112.383 Interval 6195 (3097000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3926 6 episodes - episode_reward: -209.719 [-278.830, -152.612] - loss: 13.077 - mae: 85.123 - mean_q: -112.402 Interval 6196 (3097500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7937 8 episodes - episode_reward: -168.892 [-226.361, -113.029] - loss: 10.531 - mae: 85.149 - mean_q: -112.412 Interval 6197 (3098000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5121 7 episodes - episode_reward: -169.346 [-262.883, -108.877] - loss: 9.531 - mae: 85.147 - mean_q: -112.454 Interval 6198 (3098500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5978 7 episodes - episode_reward: -181.693 [-337.842, -119.105] - loss: 15.121 - mae: 85.187 - mean_q: -112.457 Interval 6199 (3099000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9983 8 episodes - episode_reward: -195.453 [-319.887, -100.933] - loss: 11.916 - mae: 85.178 - mean_q: -112.475 Interval 6200 (3099500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1598 8 episodes - episode_reward: -194.992 [-244.238, -137.391] - loss: 12.463 - mae: 85.178 - mean_q: -112.478 Interval 6201 (3100000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1399 8 episodes - episode_reward: -199.400 [-284.972, -157.994] - loss: 11.154 - mae: 85.178 - mean_q: -112.518 Interval 6202 (3100500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4773 5 episodes - episode_reward: -429.643 [-1191.287, -154.172] - loss: 14.954 - mae: 85.225 - mean_q: -112.513 Interval 6203 (3101000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6903 7 episodes - episode_reward: -206.199 [-276.487, -141.096] - loss: 13.755 - mae: 85.240 - mean_q: -112.528 Interval 6204 (3101500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0088 7 episodes - episode_reward: -203.907 [-245.466, -116.467] - loss: 9.251 - mae: 85.250 - mean_q: -112.570 Interval 6205 (3102000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0900 8 episodes - episode_reward: -201.384 [-287.997, -131.007] - loss: 11.431 - mae: 85.279 - mean_q: -112.619 Interval 6206 (3102500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8448 8 episodes - episode_reward: -176.489 [-262.796, 39.981] - loss: 12.097 - mae: 85.321 - mean_q: -112.627 Interval 6207 (3103000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8888 7 episodes - episode_reward: -194.622 [-288.125, -150.564] - loss: 10.490 - mae: 85.350 - mean_q: -112.659 Interval 6208 (3103500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0120 8 episodes - episode_reward: -193.152 [-231.255, -164.621] - loss: 8.883 - mae: 85.376 - mean_q: -112.701 Interval 6209 (3104000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7544 8 episodes - episode_reward: -166.645 [-261.089, -78.646] - loss: 9.686 - mae: 85.405 - mean_q: -112.742 Interval 6210 (3104500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6556 9 episodes - episode_reward: -162.005 [-238.171, -111.283] - loss: 10.239 - mae: 85.417 - mean_q: -112.776 Interval 6211 (3105000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3820 6 episodes - episode_reward: -179.405 [-363.605, 26.990] - loss: 13.712 - mae: 85.472 - mean_q: -112.792 Interval 6212 (3105500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3122 11 episodes - episode_reward: -157.510 [-238.856, -100.000] - loss: 12.287 - mae: 85.458 - mean_q: -112.825 Interval 6213 (3106000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9384 8 episodes - episode_reward: -186.935 [-219.705, -156.555] - loss: 12.828 - mae: 85.479 - mean_q: -112.829 Interval 6214 (3106500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3337 8 episodes - episode_reward: -203.489 [-297.673, -100.000] - loss: 11.139 - mae: 85.494 - mean_q: -112.843 Interval 6215 (3107000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5576 10 episodes - episode_reward: -170.275 [-348.932, -100.000] - loss: 10.020 - mae: 85.514 - mean_q: -112.873 Interval 6216 (3107500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8086 8 episodes - episode_reward: -191.158 [-227.691, -140.964] - loss: 13.995 - mae: 85.554 - mean_q: -112.868 Interval 6217 (3108000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0270 7 episodes - episode_reward: -215.511 [-331.937, -150.867] - loss: 11.720 - mae: 85.582 - mean_q: -112.884 Interval 6218 (3108500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8700 7 episodes - episode_reward: -197.432 [-296.257, -124.022] - loss: 10.773 - mae: 85.599 - mean_q: -112.879 Interval 6219 (3109000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1554 8 episodes - episode_reward: -141.255 [-183.610, -70.805] - loss: 11.026 - mae: 85.645 - mean_q: -112.915 Interval 6220 (3109500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5489 7 episodes - episode_reward: -176.905 [-232.133, -130.893] - loss: 12.685 - mae: 85.674 - mean_q: -112.952 Interval 6221 (3110000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6262 7 episodes - episode_reward: -179.900 [-230.829, -100.000] - loss: 14.516 - mae: 85.721 - mean_q: -112.954 Interval 6222 (3110500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9519 9 episodes - episode_reward: -172.625 [-229.318, -99.765] - loss: 14.774 - mae: 85.732 - mean_q: -112.932 Interval 6223 (3111000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8118 8 episodes - episode_reward: -168.139 [-203.404, -100.000] - loss: 11.467 - mae: 85.725 - mean_q: -112.926 Interval 6224 (3111500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3525 9 episodes - episode_reward: -191.138 [-247.385, -152.922] - loss: 10.668 - mae: 85.729 - mean_q: -112.919 Interval 6225 (3112000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9361 7 episodes - episode_reward: -207.895 [-297.845, -164.199] - loss: 13.614 - mae: 85.754 - mean_q: -112.906 Interval 6226 (3112500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2287 9 episodes - episode_reward: -182.938 [-249.408, -100.000] - loss: 13.841 - mae: 85.761 - mean_q: -112.885 Interval 6227 (3113000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4809 8 episodes - episode_reward: -219.838 [-419.948, -142.661] - loss: 10.891 - mae: 85.750 - mean_q: -112.873 Interval 6228 (3113500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4946 7 episodes - episode_reward: -170.806 [-223.754, -116.594] - loss: 12.805 - mae: 85.776 - mean_q: -112.861 Interval 6229 (3114000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4707 7 episodes - episode_reward: -179.073 [-236.476, -139.157] - loss: 11.988 - mae: 85.789 - mean_q: -112.874 Interval 6230 (3114500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1514 8 episodes - episode_reward: -191.858 [-252.398, -109.810] - loss: 16.329 - mae: 85.817 - mean_q: -112.857 Interval 6231 (3115000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9382 8 episodes - episode_reward: -188.590 [-233.164, -100.000] - loss: 12.660 - mae: 85.813 - mean_q: -112.831 Interval 6232 (3115500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8340 7 episodes - episode_reward: -191.160 [-215.588, -156.101] - loss: 12.341 - mae: 85.803 - mean_q: -112.822 Interval 6233 (3116000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9892 8 episodes - episode_reward: -200.395 [-316.350, -113.170] - loss: 13.928 - mae: 85.809 - mean_q: -112.828 Interval 6234 (3116500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1680 7 episodes - episode_reward: -151.548 [-210.923, -52.216] - loss: 12.431 - mae: 85.803 - mean_q: -112.805 Interval 6235 (3117000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8587 7 episodes - episode_reward: -201.472 [-301.125, -134.444] - loss: 10.702 - mae: 85.781 - mean_q: -112.822 Interval 6236 (3117500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7067 9 episodes - episode_reward: -152.228 [-269.799, -26.139] - loss: 14.219 - mae: 85.799 - mean_q: -112.822 Interval 6237 (3118000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3164 7 episodes - episode_reward: -164.722 [-257.083, -44.902] - loss: 13.976 - mae: 85.802 - mean_q: -112.836 Interval 6238 (3118500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6615 7 episodes - episode_reward: -190.860 [-316.291, -97.165] - loss: 11.791 - mae: 85.801 - mean_q: -112.835 Interval 6239 (3119000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3012 6 episodes - episode_reward: -188.747 [-274.555, -48.570] - loss: 12.150 - mae: 85.801 - mean_q: -112.848 Interval 6240 (3119500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6398 7 episodes - episode_reward: -179.698 [-219.028, -140.852] - loss: 11.897 - mae: 85.799 - mean_q: -112.869 Interval 6241 (3120000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1158 6 episodes - episode_reward: -183.879 [-373.880, -4.685] - loss: 16.350 - mae: 85.824 - mean_q: -112.835 Interval 6242 (3120500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1339 8 episodes - episode_reward: -136.609 [-189.689, -40.132] - loss: 11.383 - mae: 85.785 - mean_q: -112.822 Interval 6243 (3121000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6698 6 episodes - episode_reward: -215.649 [-271.727, -174.853] - loss: 12.000 - mae: 85.800 - mean_q: -112.809 Interval 6244 (3121500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1025 9 episodes - episode_reward: -182.151 [-238.260, -100.000] - loss: 12.826 - mae: 85.802 - mean_q: -112.798 Interval 6245 (3122000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7974 7 episodes - episode_reward: -194.020 [-345.959, -54.780] - loss: 11.817 - mae: 85.803 - mean_q: -112.796 Interval 6246 (3122500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8749 9 episodes - episode_reward: -158.991 [-190.090, -99.837] - loss: 14.462 - mae: 85.790 - mean_q: -112.811 Interval 6247 (3123000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6680 7 episodes - episode_reward: -188.487 [-294.709, -147.310] - loss: 11.158 - mae: 85.758 - mean_q: -112.817 Interval 6248 (3123500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8209 9 episodes - episode_reward: -162.870 [-212.643, -118.691] - loss: 15.399 - mae: 85.782 - mean_q: -112.822 Interval 6249 (3124000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2207 6 episodes - episode_reward: -182.708 [-322.985, -96.152] - loss: 13.088 - mae: 85.749 - mean_q: -112.788 Interval 6250 (3124500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8557 8 episodes - episode_reward: -172.541 [-313.126, -100.000] - loss: 10.061 - mae: 85.727 - mean_q: -112.798 Interval 6251 (3125000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2081 8 episodes - episode_reward: -202.057 [-251.729, -163.899] - loss: 10.175 - mae: 85.722 - mean_q: -112.824 Interval 6252 (3125500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9894 9 episodes - episode_reward: -165.933 [-241.814, -126.682] - loss: 11.690 - mae: 85.731 - mean_q: -112.860 Interval 6253 (3126000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1910 9 episodes - episode_reward: -182.008 [-229.826, -49.052] - loss: 13.846 - mae: 85.741 - mean_q: -112.851 Interval 6254 (3126500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2335 8 episodes - episode_reward: -190.505 [-282.075, -100.000] - loss: 14.267 - mae: 85.756 - mean_q: -112.847 Interval 6255 (3127000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2648 10 episodes - episode_reward: -168.952 [-314.780, -100.000] - loss: 14.687 - mae: 85.777 - mean_q: -112.821 Interval 6256 (3127500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0004 8 episodes - episode_reward: -169.383 [-263.681, -100.000] - loss: 12.466 - mae: 85.760 - mean_q: -112.805 Interval 6257 (3128000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7965 7 episodes - episode_reward: -227.044 [-319.847, -132.212] - loss: 13.604 - mae: 85.775 - mean_q: -112.812 Interval 6258 (3128500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6276 6 episodes - episode_reward: -191.039 [-288.844, -111.935] - loss: 14.907 - mae: 85.738 - mean_q: -112.805 Interval 6259 (3129000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0807 6 episodes - episode_reward: -195.502 [-262.571, -141.430] - loss: 14.117 - mae: 85.700 - mean_q: -112.794 Interval 6260 (3129500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8174 7 episodes - episode_reward: -198.031 [-401.378, -130.277] - loss: 12.471 - mae: 85.667 - mean_q: -112.785 Interval 6261 (3130000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9684 8 episodes - episode_reward: -187.261 [-313.070, -140.255] - loss: 11.359 - mae: 85.633 - mean_q: -112.784 Interval 6262 (3130500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9059 8 episodes - episode_reward: -186.157 [-284.652, -140.201] - loss: 12.237 - mae: 85.604 - mean_q: -112.792 Interval 6263 (3131000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0678 8 episodes - episode_reward: -187.768 [-312.490, -100.000] - loss: 11.928 - mae: 85.558 - mean_q: -112.799 Interval 6264 (3131500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7472 7 episodes - episode_reward: -197.457 [-320.720, -118.682] - loss: 12.221 - mae: 85.503 - mean_q: -112.807 Interval 6265 (3132000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3006 9 episodes - episode_reward: -185.326 [-336.653, -126.974] - loss: 11.085 - mae: 85.445 - mean_q: -112.805 Interval 6266 (3132500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2471 8 episodes - episode_reward: -137.361 [-217.391, 24.642] - loss: 13.721 - mae: 85.408 - mean_q: -112.794 Interval 6267 (3133000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1506 6 episodes - episode_reward: -261.339 [-451.222, -77.504] - loss: 11.829 - mae: 85.361 - mean_q: -112.775 Interval 6268 (3133500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.2418 5 episodes - episode_reward: -593.026 [-784.490, -100.000] - loss: 12.572 - mae: 85.315 - mean_q: -112.718 Interval 6269 (3134000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.4970 7 episodes - episode_reward: -416.461 [-874.678, -100.000] - loss: 13.003 - mae: 85.294 - mean_q: -112.698 Interval 6270 (3134500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8147 6 episodes - episode_reward: -143.315 [-218.028, 34.477] - loss: 14.037 - mae: 85.323 - mean_q: -112.696 Interval 6271 (3135000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9913 7 episodes - episode_reward: -153.041 [-236.345, 30.249] - loss: 10.019 - mae: 85.304 - mean_q: -112.677 Interval 6272 (3135500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2324 9 episodes - episode_reward: -167.131 [-212.699, -110.088] - loss: 12.350 - mae: 85.317 - mean_q: -112.706 Interval 6273 (3136000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3644 9 episodes - episode_reward: -148.929 [-208.709, -62.195] - loss: 11.549 - mae: 85.313 - mean_q: -112.709 Interval 6274 (3136500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5120 6 episodes - episode_reward: -179.193 [-259.524, -132.480] - loss: 12.067 - mae: 85.325 - mean_q: -112.710 Interval 6275 (3137000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9836 7 episodes - episode_reward: -226.868 [-300.435, -152.501] - loss: 13.350 - mae: 85.332 - mean_q: -112.684 Interval 6276 (3137500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4633 8 episodes - episode_reward: -150.573 [-186.728, -100.000] - loss: 14.345 - mae: 85.335 - mean_q: -112.661 Interval 6277 (3138000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0209 8 episodes - episode_reward: -194.795 [-297.463, -113.278] - loss: 12.627 - mae: 85.334 - mean_q: -112.627 Interval 6278 (3138500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8393 8 episodes - episode_reward: -175.049 [-223.398, -67.189] - loss: 13.851 - mae: 85.329 - mean_q: -112.605 Interval 6279 (3139000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4703 7 episodes - episode_reward: -176.546 [-216.553, -145.281] - loss: 10.549 - mae: 85.306 - mean_q: -112.556 Interval 6280 (3139500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0202 9 episodes - episode_reward: -166.844 [-253.957, -98.949] - loss: 14.295 - mae: 85.308 - mean_q: -112.531 Interval 6281 (3140000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0851 8 episodes - episode_reward: -193.057 [-303.194, -127.819] - loss: 10.020 - mae: 85.270 - mean_q: -112.513 Interval 6282 (3140500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1417 9 episodes - episode_reward: -177.480 [-279.248, -107.538] - loss: 12.551 - mae: 85.274 - mean_q: -112.491 Interval 6283 (3141000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1007 6 episodes - episode_reward: -157.387 [-236.609, -2.197] - loss: 11.517 - mae: 85.264 - mean_q: -112.469 Interval 6284 (3141500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6609 9 episodes - episode_reward: -155.036 [-246.023, -31.915] - loss: 11.787 - mae: 85.251 - mean_q: -112.485 Interval 6285 (3142000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8393 8 episodes - episode_reward: -180.994 [-214.625, -134.768] - loss: 11.619 - mae: 85.266 - mean_q: -112.451 Interval 6286 (3142500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3839 9 episodes - episode_reward: -191.280 [-255.294, -135.109] - loss: 7.885 - mae: 85.249 - mean_q: -112.452 Interval 6287 (3143000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5583 8 episodes - episode_reward: -162.609 [-222.928, -100.000] - loss: 10.597 - mae: 85.246 - mean_q: -112.476 Interval 6288 (3143500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3823 6 episodes - episode_reward: -199.556 [-280.416, -147.052] - loss: 12.909 - mae: 85.250 - mean_q: -112.464 Interval 6289 (3144000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4271 10 episodes - episode_reward: -165.094 [-234.264, -76.705] - loss: 11.233 - mae: 85.224 - mean_q: -112.457 Interval 6290 (3144500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5870 9 episodes - episode_reward: -143.249 [-197.595, -100.000] - loss: 11.429 - mae: 85.194 - mean_q: -112.468 Interval 6291 (3145000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5833 7 episodes - episode_reward: -177.683 [-218.710, -147.659] - loss: 11.334 - mae: 85.196 - mean_q: -112.479 Interval 6292 (3145500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8022 7 episodes - episode_reward: -196.259 [-217.325, -112.082] - loss: 12.148 - mae: 85.186 - mean_q: -112.453 Interval 6293 (3146000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3318 6 episodes - episode_reward: -209.211 [-294.803, -148.491] - loss: 8.972 - mae: 85.162 - mean_q: -112.444 Interval 6294 (3146500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5986 8 episodes - episode_reward: -164.119 [-262.749, -110.243] - loss: 9.467 - mae: 85.165 - mean_q: -112.477 Interval 6295 (3147000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0007 8 episodes - episode_reward: -186.424 [-316.976, -128.734] - loss: 10.785 - mae: 85.174 - mean_q: -112.486 Interval 6296 (3147500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1453 7 episodes - episode_reward: -146.693 [-213.273, -47.487] - loss: 13.229 - mae: 85.204 - mean_q: -112.444 Interval 6297 (3148000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2647 7 episodes - episode_reward: -233.744 [-301.730, -179.854] - loss: 12.731 - mae: 85.202 - mean_q: -112.423 Interval 6298 (3148500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1477 9 episodes - episode_reward: -181.774 [-266.135, -121.052] - loss: 9.798 - mae: 85.167 - mean_q: -112.412 Interval 6299 (3149000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7840 7 episodes - episode_reward: -196.325 [-281.867, -158.086] - loss: 10.477 - mae: 85.171 - mean_q: -112.422 Interval 6300 (3149500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9387 7 episodes - episode_reward: -192.037 [-283.064, -100.000] - loss: 8.814 - mae: 85.172 - mean_q: -112.435 Interval 6301 (3150000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6330 7 episodes - episode_reward: -197.738 [-249.758, -160.753] - loss: 11.074 - mae: 85.174 - mean_q: -112.458 Interval 6302 (3150500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8719 6 episodes - episode_reward: -158.399 [-208.461, 7.396] - loss: 11.391 - mae: 85.154 - mean_q: -112.436 Interval 6303 (3151000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4164 7 episodes - episode_reward: -176.975 [-258.647, -67.545] - loss: 14.509 - mae: 85.152 - mean_q: -112.393 Interval 6304 (3151500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7152 8 episodes - episode_reward: -164.069 [-201.534, -139.598] - loss: 13.823 - mae: 85.130 - mean_q: -112.371 Interval 6305 (3152000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9235 6 episodes - episode_reward: -167.236 [-272.688, -85.824] - loss: 9.981 - mae: 85.101 - mean_q: -112.361 Interval 6306 (3152500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4811 7 episodes - episode_reward: -181.117 [-309.400, 20.194] - loss: 12.922 - mae: 85.098 - mean_q: -112.332 Interval 6307 (3153000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4563 7 episodes - episode_reward: -175.145 [-247.109, -100.000] - loss: 15.091 - mae: 85.094 - mean_q: -112.300 Interval 6308 (3153500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8951 10 episodes - episode_reward: -186.203 [-281.081, -111.982] - loss: 10.161 - mae: 85.057 - mean_q: -112.268 Interval 6309 (3154000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0614 8 episodes - episode_reward: -204.209 [-305.264, -137.593] - loss: 11.372 - mae: 85.036 - mean_q: -112.255 Interval 6310 (3154500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2744 8 episodes - episode_reward: -203.805 [-288.400, -100.000] - loss: 11.242 - mae: 85.020 - mean_q: -112.235 Interval 6311 (3155000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4808 8 episodes - episode_reward: -145.260 [-208.357, 32.398] - loss: 9.504 - mae: 84.990 - mean_q: -112.247 Interval 6312 (3155500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5844 8 episodes - episode_reward: -164.022 [-264.225, 43.181] - loss: 13.867 - mae: 85.016 - mean_q: -112.230 Interval 6313 (3156000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0245 9 episodes - episode_reward: -173.356 [-228.586, -116.955] - loss: 9.495 - mae: 84.978 - mean_q: -112.202 Interval 6314 (3156500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4225 7 episodes - episode_reward: -170.755 [-228.562, -76.413] - loss: 12.861 - mae: 84.958 - mean_q: -112.191 Interval 6315 (3157000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1512 7 episodes - episode_reward: -230.952 [-296.960, -148.175] - loss: 9.912 - mae: 84.913 - mean_q: -112.166 Interval 6316 (3157500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3232 6 episodes - episode_reward: -181.609 [-192.410, -173.989] - loss: 10.365 - mae: 84.904 - mean_q: -112.159 Interval 6317 (3158000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9885 8 episodes - episode_reward: -191.426 [-272.118, -69.724] - loss: 11.004 - mae: 84.895 - mean_q: -112.160 Interval 6318 (3158500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6382 8 episodes - episode_reward: -152.801 [-224.383, -60.691] - loss: 12.767 - mae: 84.900 - mean_q: -112.138 Interval 6319 (3159000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3705 8 episodes - episode_reward: -159.956 [-242.085, -44.712] - loss: 11.990 - mae: 84.886 - mean_q: -112.110 Interval 6320 (3159500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7291 8 episodes - episode_reward: -160.959 [-207.588, -131.706] - loss: 10.845 - mae: 84.862 - mean_q: -112.102 Interval 6321 (3160000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0106 8 episodes - episode_reward: -197.878 [-308.322, -146.237] - loss: 12.606 - mae: 84.853 - mean_q: -112.076 Interval 6322 (3160500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8086 7 episodes - episode_reward: -197.148 [-239.712, -78.139] - loss: 11.644 - mae: 84.817 - mean_q: -112.044 Interval 6323 (3161000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2130 6 episodes - episode_reward: -193.052 [-233.931, -146.346] - loss: 9.717 - mae: 84.795 - mean_q: -112.022 Interval 6324 (3161500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5683 6 episodes - episode_reward: -212.300 [-347.767, -145.344] - loss: 9.716 - mae: 84.761 - mean_q: -112.018 Interval 6325 (3162000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0349 9 episodes - episode_reward: -164.916 [-238.009, -100.000] - loss: 13.481 - mae: 84.767 - mean_q: -111.988 Interval 6326 (3162500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1606 8 episodes - episode_reward: -200.626 [-326.043, -149.995] - loss: 11.892 - mae: 84.744 - mean_q: -111.965 Interval 6327 (3163000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8349 7 episodes - episode_reward: -192.818 [-229.347, -100.000] - loss: 8.701 - mae: 84.706 - mean_q: -111.949 Interval 6328 (3163500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8455 8 episodes - episode_reward: -181.875 [-317.514, -120.446] - loss: 13.175 - mae: 84.710 - mean_q: -111.940 Interval 6329 (3164000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4250 9 episodes - episode_reward: -184.701 [-229.897, -134.683] - loss: 13.628 - mae: 84.692 - mean_q: -111.909 Interval 6330 (3164500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7836 8 episodes - episode_reward: -181.177 [-243.539, -94.818] - loss: 7.541 - mae: 84.667 - mean_q: -111.898 Interval 6331 (3165000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0932 6 episodes - episode_reward: -177.204 [-201.793, -122.013] - loss: 12.185 - mae: 84.676 - mean_q: -111.904 Interval 6332 (3165500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5596 6 episodes - episode_reward: -210.812 [-316.447, -100.000] - loss: 11.049 - mae: 84.653 - mean_q: -111.890 Interval 6333 (3166000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8459 8 episodes - episode_reward: -183.655 [-241.309, -140.813] - loss: 14.801 - mae: 84.646 - mean_q: -111.843 Interval 6334 (3166500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3265 7 episodes - episode_reward: -165.097 [-254.740, -100.000] - loss: 12.109 - mae: 84.629 - mean_q: -111.798 Interval 6335 (3167000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7124 5 episodes - episode_reward: -252.503 [-419.705, -195.085] - loss: 12.476 - mae: 84.634 - mean_q: -111.768 Interval 6336 (3167500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2527 10 episodes - episode_reward: -170.018 [-246.319, -100.000] - loss: 11.657 - mae: 84.613 - mean_q: -111.747 Interval 6337 (3168000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5720 8 episodes - episode_reward: -158.056 [-203.027, -136.966] - loss: 11.123 - mae: 84.602 - mean_q: -111.740 Interval 6338 (3168500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7324 7 episodes - episode_reward: -192.003 [-286.410, -104.427] - loss: 14.237 - mae: 84.592 - mean_q: -111.720 Interval 6339 (3169000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7398 8 episodes - episode_reward: -175.664 [-251.120, -66.494] - loss: 9.479 - mae: 84.557 - mean_q: -111.677 Interval 6340 (3169500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3467 8 episodes - episode_reward: -140.731 [-250.864, 31.626] - loss: 8.222 - mae: 84.528 - mean_q: -111.689 Interval 6341 (3170000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8109 7 episodes - episode_reward: -201.070 [-358.474, -133.896] - loss: 9.423 - mae: 84.530 - mean_q: -111.692 Interval 6342 (3170500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6297 8 episodes - episode_reward: -160.949 [-250.077, -36.739] - loss: 11.236 - mae: 84.542 - mean_q: -111.696 Interval 6343 (3171000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7746 9 episodes - episode_reward: -162.191 [-212.533, -104.636] - loss: 10.238 - mae: 84.530 - mean_q: -111.684 Interval 6344 (3171500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7557 7 episodes - episode_reward: -203.211 [-255.132, -150.844] - loss: 10.520 - mae: 84.514 - mean_q: -111.676 Interval 6345 (3172000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2361 6 episodes - episode_reward: -178.404 [-255.514, -22.290] - loss: 9.436 - mae: 84.512 - mean_q: -111.686 Interval 6346 (3172500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3990 7 episodes - episode_reward: -178.235 [-242.718, -59.998] - loss: 10.926 - mae: 84.503 - mean_q: -111.690 Interval 6347 (3173000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6627 7 episodes - episode_reward: -180.147 [-245.209, -118.575] - loss: 13.771 - mae: 84.506 - mean_q: -111.659 Interval 6348 (3173500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1854 7 episodes - episode_reward: -218.681 [-278.101, -181.434] - loss: 12.468 - mae: 84.478 - mean_q: -111.645 Interval 6349 (3174000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3270 6 episodes - episode_reward: -281.564 [-423.015, -169.499] - loss: 11.925 - mae: 84.469 - mean_q: -111.637 Interval 6350 (3174500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9553 7 episodes - episode_reward: -214.962 [-352.899, -138.301] - loss: 10.242 - mae: 84.461 - mean_q: -111.645 Interval 6351 (3175000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3881 6 episodes - episode_reward: -283.871 [-410.072, -210.496] - loss: 9.560 - mae: 84.460 - mean_q: -111.652 Interval 6352 (3175500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4725 7 episodes - episode_reward: -181.607 [-249.325, -132.546] - loss: 13.133 - mae: 84.463 - mean_q: -111.649 Interval 6353 (3176000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4093 6 episodes - episode_reward: -217.100 [-336.107, -144.609] - loss: 9.927 - mae: 84.446 - mean_q: -111.633 Interval 6354 (3176500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.8038 9 episodes - episode_reward: -419.648 [-774.497, -121.683] - loss: 12.540 - mae: 84.451 - mean_q: -111.640 Interval 6355 (3177000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1630 6 episodes - episode_reward: -181.604 [-240.934, -83.761] - loss: 11.182 - mae: 84.457 - mean_q: -111.654 Interval 6356 (3177500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8364 7 episodes - episode_reward: -199.507 [-239.045, -156.363] - loss: 13.910 - mae: 84.495 - mean_q: -111.642 Interval 6357 (3178000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4840 8 episodes - episode_reward: -156.793 [-203.960, -33.457] - loss: 15.533 - mae: 84.516 - mean_q: -111.618 Interval 6358 (3178500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8859 7 episodes - episode_reward: -212.060 [-403.716, -84.970] - loss: 15.782 - mae: 84.537 - mean_q: -111.601 Interval 6359 (3179000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8952 6 episodes - episode_reward: -152.247 [-251.747, 54.022] - loss: 10.043 - mae: 84.520 - mean_q: -111.593 Interval 6360 (3179500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9364 8 episodes - episode_reward: -178.516 [-259.975, -140.692] - loss: 12.610 - mae: 84.542 - mean_q: -111.613 Interval 6361 (3180000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0759 7 episodes - episode_reward: -227.449 [-322.948, -173.659] - loss: 12.727 - mae: 84.549 - mean_q: -111.620 Interval 6362 (3180500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2452 8 episodes - episode_reward: -196.526 [-326.392, -130.684] - loss: 13.094 - mae: 84.541 - mean_q: -111.634 Interval 6363 (3181000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6136 8 episodes - episode_reward: -172.552 [-220.792, -135.071] - loss: 7.877 - mae: 84.519 - mean_q: -111.633 Interval 6364 (3181500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3443 6 episodes - episode_reward: -188.992 [-249.737, -130.871] - loss: 12.040 - mae: 84.544 - mean_q: -111.662 Interval 6365 (3182000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0203 8 episodes - episode_reward: -193.087 [-268.558, -155.770] - loss: 11.187 - mae: 84.550 - mean_q: -111.665 Interval 6366 (3182500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0313 7 episodes - episode_reward: -139.758 [-218.062, -65.789] - loss: 9.622 - mae: 84.544 - mean_q: -111.677 Interval 6367 (3183000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3720 7 episodes - episode_reward: -175.241 [-214.254, -134.637] - loss: 10.475 - mae: 84.542 - mean_q: -111.690 Interval 6368 (3183500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1036 5 episodes - episode_reward: -196.714 [-277.004, -131.047] - loss: 11.303 - mae: 84.518 - mean_q: -111.685 Interval 6369 (3184000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2014 9 episodes - episode_reward: -296.085 [-958.061, -100.000] - loss: 12.359 - mae: 84.484 - mean_q: -111.705 Interval 6370 (3184500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4255 8 episodes - episode_reward: -150.933 [-187.066, -122.586] - loss: 12.237 - mae: 84.463 - mean_q: -111.709 Interval 6371 (3185000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7918 8 episodes - episode_reward: -177.573 [-200.375, -129.568] - loss: 11.286 - mae: 84.441 - mean_q: -111.705 Interval 6372 (3185500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2340 9 episodes - episode_reward: -175.260 [-253.692, -100.000] - loss: 12.271 - mae: 84.434 - mean_q: -111.716 Interval 6373 (3186000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3248 7 episodes - episode_reward: -218.545 [-495.847, -100.000] - loss: 11.446 - mae: 84.410 - mean_q: -111.706 Interval 6374 (3186500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3530 6 episodes - episode_reward: -465.852 [-859.251, -100.000] - loss: 9.935 - mae: 84.399 - mean_q: -111.669 Interval 6375 (3187000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6719 6 episodes - episode_reward: -394.358 [-624.182, -174.692] - loss: 10.454 - mae: 84.428 - mean_q: -111.697 Interval 6376 (3187500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4670 6 episodes - episode_reward: -205.604 [-260.939, -151.499] - loss: 8.721 - mae: 84.457 - mean_q: -111.726 Interval 6377 (3188000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3399 11 episodes - episode_reward: -151.342 [-231.039, -62.621] - loss: 10.952 - mae: 84.506 - mean_q: -111.765 Interval 6378 (3188500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4260 9 episodes - episode_reward: -189.344 [-301.980, -141.252] - loss: 10.056 - mae: 84.545 - mean_q: -111.805 Interval 6379 (3189000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7940 9 episodes - episode_reward: -152.269 [-224.555, -91.983] - loss: 10.534 - mae: 84.598 - mean_q: -111.843 Interval 6380 (3189500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0222 9 episodes - episode_reward: -168.378 [-275.367, 25.320] - loss: 15.293 - mae: 84.644 - mean_q: -111.847 Interval 6381 (3190000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9873 8 episodes - episode_reward: -187.885 [-254.322, -137.518] - loss: 11.313 - mae: 84.646 - mean_q: -111.847 Interval 6382 (3190500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9903 9 episodes - episode_reward: -172.143 [-203.136, -111.740] - loss: 11.407 - mae: 84.671 - mean_q: -111.840 Interval 6383 (3191000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6777 7 episodes - episode_reward: -183.179 [-311.078, -124.573] - loss: 9.773 - mae: 84.675 - mean_q: -111.836 Interval 6384 (3191500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8727 9 episodes - episode_reward: -151.244 [-195.350, -100.000] - loss: 11.046 - mae: 84.692 - mean_q: -111.851 Interval 6385 (3192000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5764: 9 episodes - episode_reward: -153.171 [-243.119, -0.872] - loss: 10.752 - mae: 84.727 - mean_q: -111.866 Interval 6386 (3192500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0927 7 episodes - episode_reward: -220.582 [-422.713, -115.946] - loss: 10.000 - mae: 84.727 - mean_q: -111.881 Interval 6387 (3193000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5352 10 episodes - episode_reward: -178.051 [-253.506, -115.255] - loss: 10.218 - mae: 84.734 - mean_q: -111.892 Interval 6388 (3193500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9237 8 episodes - episode_reward: -181.603 [-254.414, -119.457] - loss: 13.821 - mae: 84.777 - mean_q: -111.875 Interval 6389 (3194000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0710 9 episodes - episode_reward: -168.763 [-266.345, -100.000] - loss: 13.113 - mae: 84.764 - mean_q: -111.873 Interval 6390 (3194500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8715 8 episodes - episode_reward: -165.707 [-237.466, -115.826] - loss: 10.014 - mae: 84.766 - mean_q: -111.882 Interval 6391 (3195000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3878 7 episodes - episode_reward: -174.011 [-287.182, -94.166] - loss: 9.791 - mae: 84.776 - mean_q: -111.914 Interval 6392 (3195500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8715 9 episodes - episode_reward: -172.812 [-239.242, -123.310] - loss: 13.380 - mae: 84.792 - mean_q: -111.920 Interval 6393 (3196000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7935 7 episodes - episode_reward: -186.178 [-232.298, -147.696] - loss: 14.737 - mae: 84.797 - mean_q: -111.911 Interval 6394 (3196500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0710 8 episodes - episode_reward: -191.304 [-249.389, -120.767] - loss: 11.738 - mae: 84.787 - mean_q: -111.914 Interval 6395 (3197000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6780 8 episodes - episode_reward: -177.075 [-207.399, -142.369] - loss: 13.256 - mae: 84.809 - mean_q: -111.933 Interval 6396 (3197500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6783 7 episodes - episode_reward: -179.029 [-229.990, -37.756] - loss: 10.292 - mae: 84.792 - mean_q: -111.955 Interval 6397 (3198000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3976 7 episodes - episode_reward: -182.274 [-286.519, -57.714] - loss: 8.207 - mae: 84.797 - mean_q: -111.978 Interval 6398 (3198500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1513 7 episodes - episode_reward: -229.918 [-331.007, -170.347] - loss: 9.161 - mae: 84.800 - mean_q: -112.020 Interval 6399 (3199000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9129 6 episodes - episode_reward: -155.285 [-292.684, 40.684] - loss: 9.652 - mae: 84.823 - mean_q: -112.050 Interval 6400 (3199500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7487 7 episodes - episode_reward: -198.733 [-294.109, -46.643] - loss: 12.352 - mae: 84.876 - mean_q: -112.027 Interval 6401 (3200000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5725 8 episodes - episode_reward: -153.457 [-265.713, -73.547] - loss: 12.201 - mae: 84.857 - mean_q: -112.027 Interval 6402 (3200500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9725 9 episodes - episode_reward: -166.395 [-251.747, -119.475] - loss: 14.979 - mae: 84.861 - mean_q: -112.040 Interval 6403 (3201000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9148 8 episodes - episode_reward: -178.588 [-219.301, -116.281] - loss: 11.795 - mae: 84.859 - mean_q: -112.041 Interval 6404 (3201500 steps performed) 500/500 [==============================] - ETA: 0s - reward: -3.13 - 2s 4ms/step - reward: -3.1203 8 episodes - episode_reward: -195.053 [-267.038, -100.000] - loss: 10.657 - mae: 84.853 - mean_q: -112.082 Interval 6405 (3202000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4415 8 episodes - episode_reward: -212.033 [-273.606, -128.422] - loss: 9.985 - mae: 84.863 - mean_q: -112.120 Interval 6406 (3202500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1602 8 episodes - episode_reward: -205.603 [-296.937, -103.344] - loss: 8.284 - mae: 84.880 - mean_q: -112.177 Interval 6407 (3203000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3808 8 episodes - episode_reward: -214.993 [-316.878, -154.857] - loss: 12.493 - mae: 84.913 - mean_q: -112.213 Interval 6408 (3203500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0438 8 episodes - episode_reward: -189.653 [-241.276, -154.904] - loss: 9.834 - mae: 84.920 - mean_q: -112.216 Interval 6409 (3204000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4009 7 episodes - episode_reward: -167.321 [-242.063, -66.339] - loss: 11.589 - mae: 84.947 - mean_q: -112.236 Interval 6410 (3204500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0531 9 episodes - episode_reward: -167.002 [-211.424, -112.644] - loss: 11.622 - mae: 84.939 - mean_q: -112.262 Interval 6411 (3205000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9597 8 episodes - episode_reward: -186.239 [-229.953, -138.113] - loss: 10.180 - mae: 84.970 - mean_q: -112.278 Interval 6412 (3205500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8682 8 episodes - episode_reward: -187.058 [-291.429, -30.280] - loss: 12.861 - mae: 84.996 - mean_q: -112.294 Interval 6413 (3206000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3693 6 episodes - episode_reward: -179.963 [-270.698, -79.605] - loss: 9.696 - mae: 84.997 - mean_q: -112.318 Interval 6414 (3206500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9849 9 episodes - episode_reward: -175.852 [-227.047, -100.000] - loss: 10.550 - mae: 85.009 - mean_q: -112.361 Interval 6415 (3207000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3779 9 episodes - episode_reward: -187.566 [-252.755, -100.000] - loss: 9.794 - mae: 85.013 - mean_q: -112.393 Interval 6416 (3207500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7310 8 episodes - episode_reward: -170.663 [-216.846, -132.368] - loss: 9.442 - mae: 85.042 - mean_q: -112.433 Interval 6417 (3208000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2987 7 episodes - episode_reward: -159.459 [-175.314, -135.679] - loss: 9.872 - mae: 85.070 - mean_q: -112.465 Interval 6418 (3208500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4728 9 episodes - episode_reward: -142.781 [-252.288, 28.032] - loss: 10.220 - mae: 85.077 - mean_q: -112.481 Interval 6419 (3209000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1106 6 episodes - episode_reward: -163.072 [-275.340, -82.448] - loss: 11.225 - mae: 85.079 - mean_q: -112.521 Interval 6420 (3209500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0420 7 episodes - episode_reward: -223.018 [-341.136, -85.551] - loss: 10.234 - mae: 85.108 - mean_q: -112.505 Interval 6421 (3210000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9550 9 episodes - episode_reward: -163.779 [-206.945, -113.944] - loss: 9.754 - mae: 85.126 - mean_q: -112.504 Interval 6422 (3210500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3626 7 episodes - episode_reward: -239.579 [-448.153, -115.339] - loss: 11.056 - mae: 85.147 - mean_q: -112.505 Interval 6423 (3211000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9824 9 episodes - episode_reward: -166.110 [-247.495, -110.576] - loss: 12.950 - mae: 85.165 - mean_q: -112.513 Interval 6424 (3211500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8617 8 episodes - episode_reward: -180.265 [-230.622, -146.722] - loss: 14.128 - mae: 85.181 - mean_q: -112.503 Interval 6425 (3212000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1948 7 episodes - episode_reward: -158.128 [-206.171, -100.000] - loss: 14.867 - mae: 85.198 - mean_q: -112.505 Interval 6426 (3212500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0724 8 episodes - episode_reward: -191.919 [-251.128, -145.243] - loss: 13.835 - mae: 85.214 - mean_q: -112.485 Interval 6427 (3213000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5393 7 episodes - episode_reward: -181.110 [-248.963, -141.534] - loss: 11.865 - mae: 85.203 - mean_q: -112.468 Interval 6428 (3213500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6882 6 episodes - episode_reward: -207.234 [-259.477, -170.831] - loss: 9.285 - mae: 85.193 - mean_q: -112.473 Interval 6429 (3214000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5140 10 episodes - episode_reward: -187.400 [-247.772, -143.936] - loss: 12.527 - mae: 85.219 - mean_q: -112.493 Interval 6430 (3214500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4286 7 episodes - episode_reward: -169.196 [-297.150, -40.643] - loss: 13.924 - mae: 85.226 - mean_q: -112.475 Interval 6431 (3215000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3871 9 episodes - episode_reward: -189.708 [-260.839, -120.775] - loss: 11.414 - mae: 85.222 - mean_q: -112.493 Interval 6432 (3215500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8625 8 episodes - episode_reward: -181.671 [-230.712, -112.572] - loss: 9.085 - mae: 85.215 - mean_q: -112.515 Interval 6433 (3216000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5929 9 episodes - episode_reward: -142.127 [-207.697, 11.534] - loss: 8.608 - mae: 85.223 - mean_q: -112.560 Interval 6434 (3216500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2795 9 episodes - episode_reward: -175.479 [-223.512, -111.056] - loss: 14.515 - mae: 85.277 - mean_q: -112.570 Interval 6435 (3217000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0978 9 episodes - episode_reward: -169.177 [-229.147, -100.000] - loss: 11.142 - mae: 85.253 - mean_q: -112.558 Interval 6436 (3217500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7929 8 episodes - episode_reward: -176.420 [-228.582, -81.045] - loss: 11.005 - mae: 85.268 - mean_q: -112.568 Interval 6437 (3218000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1627 9 episodes - episode_reward: -170.424 [-216.471, -100.000] - loss: 8.709 - mae: 85.269 - mean_q: -112.601 Interval 6438 (3218500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9712 9 episodes - episode_reward: -179.107 [-241.977, -121.244] - loss: 10.244 - mae: 85.295 - mean_q: -112.621 Interval 6439 (3219000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1101: 0s - reward: 8 episodes - episode_reward: -189.677 [-267.542, -103.389] - loss: 9.345 - mae: 85.298 - mean_q: -112.651 Interval 6440 (3219500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0571 9 episodes - episode_reward: -176.718 [-219.640, -126.939] - loss: 10.669 - mae: 85.327 - mean_q: -112.685 Interval 6441 (3220000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6187 7 episodes - episode_reward: -181.046 [-227.235, -157.219] - loss: 11.368 - mae: 85.344 - mean_q: -112.695 Interval 6442 (3220500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3285 9 episodes - episode_reward: -187.831 [-236.557, -137.530] - loss: 13.433 - mae: 85.370 - mean_q: -112.710 Interval 6443 (3221000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1659 7 episodes - episode_reward: -156.023 [-237.074, -76.333] - loss: 13.232 - mae: 85.379 - mean_q: -112.712 Interval 6444 (3221500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8298 7 episodes - episode_reward: -193.278 [-216.672, -172.263] - loss: 12.831 - mae: 85.389 - mean_q: -112.708 Interval 6445 (3222000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3427 8 episodes - episode_reward: -216.087 [-345.639, -140.754] - loss: 10.263 - mae: 85.379 - mean_q: -112.729 Interval 6446 (3222500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5091 7 episodes - episode_reward: -171.998 [-220.615, -114.309] - loss: 9.405 - mae: 85.393 - mean_q: -112.753 Interval 6447 (3223000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5382 6 episodes - episode_reward: -210.598 [-338.229, -162.059] - loss: 10.981 - mae: 85.420 - mean_q: -112.771 Interval 6448 (3223500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2202 8 episodes - episode_reward: -138.475 [-222.532, -1.305] - loss: 7.996 - mae: 85.423 - mean_q: -112.790 Interval 6449 (3224000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6895 7 episodes - episode_reward: -175.480 [-236.362, -110.232] - loss: 9.483 - mae: 85.438 - mean_q: -112.810 Interval 6450 (3224500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0056 8 episodes - episode_reward: -207.136 [-309.024, -100.000] - loss: 7.847 - mae: 85.442 - mean_q: -112.844 Interval 6451 (3225000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1077 6 episodes - episode_reward: -247.332 [-454.836, -175.279] - loss: 8.608 - mae: 85.457 - mean_q: -112.867 Interval 6452 (3225500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1469 10 episodes - episode_reward: -163.464 [-221.672, -124.164] - loss: 9.208 - mae: 85.472 - mean_q: -112.901 Interval 6453 (3226000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3632 6 episodes - episode_reward: -184.507 [-276.632, -2.667] - loss: 10.905 - mae: 85.483 - mean_q: -112.892 Interval 6454 (3226500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1892 8 episodes - episode_reward: -208.010 [-272.691, -142.276] - loss: 11.328 - mae: 85.474 - mean_q: -112.855 Interval 6455 (3227000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0526 8 episodes - episode_reward: -189.002 [-261.031, -162.975] - loss: 10.630 - mae: 85.466 - mean_q: -112.870 Interval 6456 (3227500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6841 8 episodes - episode_reward: -171.817 [-236.777, -107.563] - loss: 10.136 - mae: 85.448 - mean_q: -112.867 Interval 6457 (3228000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2084 6 episodes - episode_reward: -171.691 [-211.574, -109.992] - loss: 9.686 - mae: 85.422 - mean_q: -112.839 Interval 6458 (3228500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9220 8 episodes - episode_reward: -188.001 [-294.009, -122.346] - loss: 8.838 - mae: 85.407 - mean_q: -112.826 Interval 6459 (3229000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5823 8 episodes - episode_reward: -164.732 [-254.891, -91.879] - loss: 8.655 - mae: 85.384 - mean_q: -112.846 Interval 6460 (3229500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4582 7 episodes - episode_reward: -176.252 [-293.624, 50.244] - loss: 11.296 - mae: 85.370 - mean_q: -112.847 Interval 6461 (3230000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7417 8 episodes - episode_reward: -169.487 [-226.944, -68.877] - loss: 12.073 - mae: 85.349 - mean_q: -112.823 Interval 6462 (3230500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5844 9 episodes - episode_reward: -201.998 [-301.903, -131.181] - loss: 10.472 - mae: 85.334 - mean_q: -112.791 Interval 6463 (3231000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5934 6 episodes - episode_reward: -132.908 [-257.483, 47.068] - loss: 8.739 - mae: 85.324 - mean_q: -112.762 Interval 6464 (3231500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2308 7 episodes - episode_reward: -219.750 [-327.947, -126.225] - loss: 11.214 - mae: 85.336 - mean_q: -112.757 Interval 6465 (3232000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2805 9 episodes - episode_reward: -180.626 [-257.314, -100.000] - loss: 9.720 - mae: 85.312 - mean_q: -112.748 Interval 6466 (3232500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8323 10 episodes - episode_reward: -149.156 [-211.365, -8.469] - loss: 11.896 - mae: 85.327 - mean_q: -112.746 Interval 6467 (3233000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2910 9 episodes - episode_reward: -181.150 [-209.119, -155.181] - loss: 8.143 - mae: 85.309 - mean_q: -112.757 Interval 6468 (3233500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6969 8 episodes - episode_reward: -168.284 [-223.821, -100.000] - loss: 11.656 - mae: 85.318 - mean_q: -112.769 Interval 6469 (3234000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2366 7 episodes - episode_reward: -236.811 [-304.265, -144.127] - loss: 9.290 - mae: 85.321 - mean_q: -112.750 Interval 6470 (3234500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4414 7 episodes - episode_reward: -184.777 [-225.607, -100.000] - loss: 12.713 - mae: 85.306 - mean_q: -112.744 Interval 6471 (3235000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0813 8 episodes - episode_reward: -115.581 [-260.998, 19.495] - loss: 13.094 - mae: 85.294 - mean_q: -112.734 Interval 6472 (3235500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0944 8 episodes - episode_reward: -193.554 [-241.023, -162.845] - loss: 9.944 - mae: 85.248 - mean_q: -112.718 Interval 6473 (3236000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0467 7 episodes - episode_reward: -219.794 [-316.251, -147.829] - loss: 8.817 - mae: 85.205 - mean_q: -112.751 Interval 6474 (3236500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5070 7 episodes - episode_reward: -171.733 [-241.152, -117.052] - loss: 9.637 - mae: 85.198 - mean_q: -112.759 Interval 6475 (3237000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8364 8 episodes - episode_reward: -180.949 [-267.897, -100.000] - loss: 11.024 - mae: 85.171 - mean_q: -112.745 Interval 6476 (3237500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4325 8 episodes - episode_reward: -162.371 [-202.546, -105.488] - loss: 11.407 - mae: 85.134 - mean_q: -112.732 Interval 6477 (3238000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8902 8 episodes - episode_reward: -166.463 [-205.459, -116.518] - loss: 11.993 - mae: 85.097 - mean_q: -112.688 Interval 6478 (3238500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4802 8 episodes - episode_reward: -226.914 [-309.572, -173.735] - loss: 12.846 - mae: 85.073 - mean_q: -112.681 Interval 6479 (3239000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.9951 5 episodes - episode_reward: -437.161 [-838.081, -111.521] - loss: 13.048 - mae: 85.037 - mean_q: -112.625 Interval 6480 (3239500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8272 5 episodes - episode_reward: -532.371 [-837.672, -300.793] - loss: 12.671 - mae: 85.014 - mean_q: -112.589 Interval 6481 (3240000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6048 7 episodes - episode_reward: -176.946 [-316.732, -38.278] - loss: 12.499 - mae: 85.005 - mean_q: -112.587 Interval 6482 (3240500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3970 6 episodes - episode_reward: -211.456 [-326.303, -136.846] - loss: 11.360 - mae: 85.002 - mean_q: -112.591 Interval 6483 (3241000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2836 6 episodes - episode_reward: -191.731 [-228.804, -119.655] - loss: 8.653 - mae: 85.008 - mean_q: -112.591 Interval 6484 (3241500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9964 7 episodes - episode_reward: -218.711 [-305.572, -147.455] - loss: 10.977 - mae: 85.026 - mean_q: -112.595 Interval 6485 (3242000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4644 6 episodes - episode_reward: -188.805 [-270.778, -108.513] - loss: 11.261 - mae: 85.025 - mean_q: -112.582 Interval 6486 (3242500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4197 10 episodes - episode_reward: -175.630 [-271.091, -125.923] - loss: 11.434 - mae: 85.031 - mean_q: -112.563 Interval 6487 (3243000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9042 8 episodes - episode_reward: -185.041 [-205.974, -148.242] - loss: 12.379 - mae: 85.046 - mean_q: -112.521 Interval 6488 (3243500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9184 11 episodes - episode_reward: -176.399 [-262.953, -100.000] - loss: 12.090 - mae: 85.046 - mean_q: -112.506 Interval 6489 (3244000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8337 8 episodes - episode_reward: -180.406 [-273.152, -121.535] - loss: 11.396 - mae: 85.048 - mean_q: -112.481 Interval 6490 (3244500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9888 8 episodes - episode_reward: -185.327 [-256.785, -151.882] - loss: 14.118 - mae: 85.047 - mean_q: -112.461 Interval 6491 (3245000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8416 8 episodes - episode_reward: -168.209 [-237.627, -124.585] - loss: 8.220 - mae: 84.988 - mean_q: -112.458 Interval 6492 (3245500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6173 8 episodes - episode_reward: -178.736 [-228.760, -111.369] - loss: 9.902 - mae: 84.985 - mean_q: -112.469 Interval 6493 (3246000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8667 10 episodes - episode_reward: -143.421 [-178.389, -100.000] - loss: 10.535 - mae: 84.990 - mean_q: -112.441 Interval 6494 (3246500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7525 8 episodes - episode_reward: -232.092 [-336.463, -100.000] - loss: 11.629 - mae: 84.993 - mean_q: -112.428 Interval 6495 (3247000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2005 8 episodes - episode_reward: -199.771 [-366.768, -159.799] - loss: 10.896 - mae: 84.975 - mean_q: -112.407 Interval 6496 (3247500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1047 8 episodes - episode_reward: -190.258 [-255.794, -121.423] - loss: 8.552 - mae: 84.959 - mean_q: -112.414 Interval 6497 (3248000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1107 8 episodes - episode_reward: -198.035 [-281.296, -144.916] - loss: 11.057 - mae: 84.984 - mean_q: -112.421 Interval 6498 (3248500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8844 8 episodes - episode_reward: -179.994 [-242.961, -100.000] - loss: 8.688 - mae: 84.986 - mean_q: -112.448 Interval 6499 (3249000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2177 6 episodes - episode_reward: -172.611 [-235.151, -126.976] - loss: 11.903 - mae: 84.994 - mean_q: -112.438 Interval 6500 (3249500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9150 7 episodes - episode_reward: -150.416 [-218.005, 61.156] - loss: 10.009 - mae: 84.984 - mean_q: -112.443 Interval 6501 (3250000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0199 7 episodes - episode_reward: -201.667 [-278.691, -153.184] - loss: 13.732 - mae: 85.012 - mean_q: -112.412 Interval 6502 (3250500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6660 7 episodes - episode_reward: -200.639 [-339.204, -117.268] - loss: 9.807 - mae: 84.982 - mean_q: -112.396 Interval 6503 (3251000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7152 8 episodes - episode_reward: -172.725 [-220.502, -135.646] - loss: 10.353 - mae: 84.973 - mean_q: -112.400 Interval 6504 (3251500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5737 6 episodes - episode_reward: -209.178 [-226.188, -184.325] - loss: 11.557 - mae: 84.962 - mean_q: -112.390 Interval 6505 (3252000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9347 8 episodes - episode_reward: -182.211 [-240.607, -128.231] - loss: 10.162 - mae: 84.965 - mean_q: -112.369 Interval 6506 (3252500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3160 6 episodes - episode_reward: -199.441 [-275.686, -151.747] - loss: 9.796 - mae: 84.953 - mean_q: -112.354 Interval 6507 (3253000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8254 7 episodes - episode_reward: -202.693 [-242.002, -108.777] - loss: 7.314 - mae: 84.936 - mean_q: -112.385 Interval 6508 (3253500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1290 8 episodes - episode_reward: -195.297 [-269.367, -93.220] - loss: 12.206 - mae: 84.959 - mean_q: -112.395 Interval 6509 (3254000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0534 8 episodes - episode_reward: -186.127 [-254.698, -100.000] - loss: 8.528 - mae: 84.940 - mean_q: -112.388 Interval 6510 (3254500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8826 8 episodes - episode_reward: -183.188 [-219.716, -160.823] - loss: 10.008 - mae: 84.955 - mean_q: -112.397 Interval 6511 (3255000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5885 7 episodes - episode_reward: -187.490 [-227.842, -125.471] - loss: 8.729 - mae: 84.956 - mean_q: -112.398 Interval 6512 (3255500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4044 7 episodes - episode_reward: -169.029 [-215.209, -106.925] - loss: 10.497 - mae: 84.969 - mean_q: -112.391 Interval 6513 (3256000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8778 9 episodes - episode_reward: -157.673 [-246.169, -1.879] - loss: 8.254 - mae: 84.955 - mean_q: -112.379 Interval 6514 (3256500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9444 8 episodes - episode_reward: -187.249 [-238.651, -100.000] - loss: 11.264 - mae: 84.938 - mean_q: -112.365 Interval 6515 (3257000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6789 6 episodes - episode_reward: -201.700 [-241.375, -158.923] - loss: 9.833 - mae: 84.926 - mean_q: -112.352 Interval 6516 (3257500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4272 7 episodes - episode_reward: -195.010 [-285.720, -92.031] - loss: 8.713 - mae: 84.904 - mean_q: -112.351 Interval 6517 (3258000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6751 6 episodes - episode_reward: -128.461 [-191.479, -31.566] - loss: 10.307 - mae: 84.896 - mean_q: -112.358 Interval 6518 (3258500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1981 9 episodes - episode_reward: -178.905 [-237.392, -100.000] - loss: 12.193 - mae: 84.905 - mean_q: -112.362 Interval 6519 (3259000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5728 9 episodes - episode_reward: -197.870 [-338.602, -100.000] - loss: 7.844 - mae: 84.886 - mean_q: -112.370 Interval 6520 (3259500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1770 9 episodes - episode_reward: -233.886 [-395.390, -161.382] - loss: 7.186 - mae: 84.892 - mean_q: -112.396 Interval 6521 (3260000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6940 8 episodes - episode_reward: -171.695 [-201.059, -134.094] - loss: 10.764 - mae: 84.921 - mean_q: -112.411 Interval 6522 (3260500 steps performed) 500/500 [==============================] - 27s 54ms/step - reward: -2.8113 8 episodes - episode_reward: -171.899 [-268.061, -17.347] - loss: 9.936 - mae: 84.916 - mean_q: -112.439 Interval 6523 (3261000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9870 8 episodes - episode_reward: -190.477 [-242.403, -151.495] - loss: 7.801 - mae: 84.908 - mean_q: -112.476 Interval 6524 (3261500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4126 9 episodes - episode_reward: -134.462 [-207.623, -12.414] - loss: 11.908 - mae: 84.938 - mean_q: -112.477 Interval 6525 (3262000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6995 8 episodes - episode_reward: -162.238 [-220.631, -66.241] - loss: 10.409 - mae: 84.937 - mean_q: -112.470 Interval 6526 (3262500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3834 8 episodes - episode_reward: -150.347 [-202.765, -33.208] - loss: 7.146 - mae: 84.908 - mean_q: -112.487 Interval 6527 (3263000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.1986 9 episodes - episode_reward: -175.187 [-238.076, -113.653] - loss: 8.470 - mae: 84.920 - mean_q: -112.537 Interval 6528 (3263500 steps performed) 500/500 [==============================] - 320s 641ms/step - reward: -3.1689 8 episodes - episode_reward: -192.408 [-250.095, -100.000] - loss: 9.056 - mae: 84.929 - mean_q: -112.553 Interval 6529 (3264000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6027 7 episodes - episode_reward: -194.566 [-225.221, -158.165] - loss: 8.205 - mae: 84.934 - mean_q: -112.559 Interval 6530 (3264500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5974 8 episodes - episode_reward: -168.695 [-255.355, 22.240] - loss: 12.877 - mae: 84.963 - mean_q: -112.535 Interval 6531 (3265000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2821 5 episodes - episode_reward: -213.483 [-275.698, -163.683] - loss: 8.914 - mae: 84.934 - mean_q: -112.530 Interval 6532 (3265500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1348 10 episodes - episode_reward: -159.662 [-302.709, -100.000] - loss: 8.190 - mae: 84.940 - mean_q: -112.550 Interval 6533 (3266000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0401 10 episodes - episode_reward: -155.420 [-237.718, -122.677] - loss: 9.225 - mae: 84.936 - mean_q: -112.541 Interval 6534 (3266500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8768 7 episodes - episode_reward: -196.213 [-317.227, -139.257] - loss: 11.259 - mae: 84.951 - mean_q: -112.549 Interval 6535 (3267000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3355 9 episodes - episode_reward: -192.376 [-388.333, -93.924] - loss: 9.590 - mae: 84.951 - mean_q: -112.568 Interval 6536 (3267500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7768 9 episodes - episode_reward: -148.645 [-204.818, -100.000] - loss: 11.054 - mae: 84.955 - mean_q: -112.552 Interval 6537 (3268000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4580 8 episodes - episode_reward: -156.463 [-231.865, -7.862] - loss: 8.454 - mae: 84.948 - mean_q: -112.550 Interval 6538 (3268500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8609 9 episodes - episode_reward: -163.223 [-202.757, -100.000] - loss: 10.582 - mae: 84.950 - mean_q: -112.542 Interval 6539 (3269000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8326 8 episodes - episode_reward: -176.226 [-217.295, -100.000] - loss: 9.014 - mae: 84.965 - mean_q: -112.538 Interval 6540 (3269500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6203 6 episodes - episode_reward: -215.868 [-291.878, -158.590] - loss: 10.983 - mae: 84.966 - mean_q: -112.535 Interval 6541 (3270000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7391 6 episodes - episode_reward: -230.440 [-346.735, -167.617] - loss: 8.284 - mae: 84.953 - mean_q: -112.532 Interval 6542 (3270500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0227 9 episodes - episode_reward: -169.046 [-226.763, -114.428] - loss: 7.234 - mae: 84.960 - mean_q: -112.570 Interval 6543 (3271000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1766 9 episodes - episode_reward: -175.314 [-256.640, -100.000] - loss: 9.424 - mae: 84.970 - mean_q: -112.579 Interval 6544 (3271500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5818 6 episodes - episode_reward: -205.182 [-269.977, -119.624] - loss: 10.307 - mae: 84.973 - mean_q: -112.605 Interval 6545 (3272000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3971 10 episodes - episode_reward: -175.423 [-239.285, -143.405] - loss: 11.129 - mae: 84.994 - mean_q: -112.616 Interval 6546 (3272500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9174 8 episodes - episode_reward: -180.681 [-218.982, -118.687] - loss: 7.032 - mae: 84.984 - mean_q: -112.602 Interval 6547 (3273000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1959 6 episodes - episode_reward: -176.469 [-207.951, -127.401] - loss: 7.605 - mae: 84.987 - mean_q: -112.598 Interval 6548 (3273500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7998 7 episodes - episode_reward: -199.074 [-308.164, -125.253] - loss: 11.268 - mae: 85.004 - mean_q: -112.579 Interval 6549 (3274000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4725 10 episodes - episode_reward: -180.792 [-294.478, -107.901] - loss: 8.528 - mae: 85.000 - mean_q: -112.591 Interval 6550 (3274500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0123 6 episodes - episode_reward: -154.545 [-171.092, -121.017] - loss: 11.411 - mae: 85.002 - mean_q: -112.606 Interval 6551 (3275000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8726 8 episodes - episode_reward: -181.555 [-269.412, -100.000] - loss: 8.963 - mae: 85.001 - mean_q: -112.595 Interval 6552 (3275500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3155 7 episodes - episode_reward: -170.156 [-196.272, -128.299] - loss: 8.458 - mae: 84.998 - mean_q: -112.583 Interval 6553 (3276000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3882 7 episodes - episode_reward: -166.875 [-222.128, -111.479] - loss: 11.739 - mae: 85.002 - mean_q: -112.581 Interval 6554 (3276500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9524 9 episodes - episode_reward: -166.463 [-198.479, -131.396] - loss: 8.395 - mae: 84.992 - mean_q: -112.591 Interval 6555 (3277000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1261 8 episodes - episode_reward: -189.859 [-238.820, -123.352] - loss: 11.247 - mae: 85.009 - mean_q: -112.579 Interval 6556 (3277500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9407 8 episodes - episode_reward: -186.079 [-246.737, -126.986] - loss: 6.322 - mae: 84.973 - mean_q: -112.591 Interval 6557 (3278000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4081 8 episodes - episode_reward: -286.024 [-455.590, -119.702] - loss: 9.201 - mae: 84.969 - mean_q: -112.620 Interval 6558 (3278500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1660 6 episodes - episode_reward: -170.386 [-236.392, -70.519] - loss: 10.871 - mae: 84.995 - mean_q: -112.602 Interval 6559 (3279000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6962 7 episodes - episode_reward: -193.931 [-252.087, -134.695] - loss: 9.277 - mae: 84.988 - mean_q: -112.605 Interval 6560 (3279500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6454 7 episodes - episode_reward: -183.907 [-338.756, -27.570] - loss: 9.247 - mae: 85.002 - mean_q: -112.609 Interval 6561 (3280000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7471 9 episodes - episode_reward: -156.709 [-205.840, -100.000] - loss: 9.735 - mae: 85.026 - mean_q: -112.610 Interval 6562 (3280500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2556 7 episodes - episode_reward: -226.677 [-376.462, -169.740] - loss: 9.964 - mae: 85.038 - mean_q: -112.600 Interval 6563 (3281000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2872 8 episodes - episode_reward: -198.909 [-245.751, -135.653] - loss: 11.613 - mae: 85.046 - mean_q: -112.596 Interval 6564 (3281500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9367 10 episodes - episode_reward: -157.358 [-243.113, -100.000] - loss: 13.062 - mae: 85.066 - mean_q: -112.565 Interval 6565 (3282000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1880 8 episodes - episode_reward: -129.425 [-237.130, 0.739] - loss: 7.736 - mae: 85.053 - mean_q: -112.542 Interval 6566 (3282500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7804 7 episodes - episode_reward: -205.546 [-312.349, -163.752] - loss: 9.393 - mae: 85.058 - mean_q: -112.562 Interval 6567 (3283000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9805 5 episodes - episode_reward: -189.095 [-336.269, -23.059] - loss: 8.210 - mae: 85.067 - mean_q: -112.559 Interval 6568 (3283500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8200 9 episodes - episode_reward: -166.181 [-249.856, -110.254] - loss: 8.966 - mae: 85.068 - mean_q: -112.568 Interval 6569 (3284000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7031 8 episodes - episode_reward: -167.405 [-252.077, -100.000] - loss: 9.846 - mae: 85.090 - mean_q: -112.563 Interval 6570 (3284500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3930 7 episodes - episode_reward: -171.774 [-215.816, -100.000] - loss: 12.271 - mae: 85.093 - mean_q: -112.527 Interval 6571 (3285000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4829 7 episodes - episode_reward: -160.113 [-218.539, -120.386] - loss: 9.593 - mae: 85.070 - mean_q: -112.500 Interval 6572 (3285500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3887 8 episodes - episode_reward: -162.901 [-268.903, -32.532] - loss: 9.248 - mae: 85.063 - mean_q: -112.494 Interval 6573 (3286000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4511 8 episodes - episode_reward: -151.284 [-199.432, 13.373] - loss: 11.186 - mae: 85.072 - mean_q: -112.475 Interval 6574 (3286500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2021 7 episodes - episode_reward: -151.011 [-275.114, -0.100] - loss: 9.843 - mae: 85.057 - mean_q: -112.461 Interval 6575 (3287000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1513 8 episodes - episode_reward: -199.383 [-242.304, -145.720] - loss: 9.725 - mae: 85.056 - mean_q: -112.458 Interval 6576 (3287500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5067 9 episodes - episode_reward: -195.876 [-334.831, -108.377] - loss: 7.528 - mae: 85.050 - mean_q: -112.457 Interval 6577 (3288000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6376 7 episodes - episode_reward: -180.593 [-300.536, -88.201] - loss: 11.760 - mae: 85.061 - mean_q: -112.434 Interval 6578 (3288500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6337 7 episodes - episode_reward: -193.620 [-314.633, -129.364] - loss: 10.940 - mae: 85.041 - mean_q: -112.419 Interval 6579 (3289000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0439 7 episodes - episode_reward: -200.788 [-393.201, -87.404] - loss: 13.261 - mae: 85.011 - mean_q: -112.384 Interval 6580 (3289500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4349 8 episodes - episode_reward: -168.002 [-312.461, -10.877] - loss: 9.308 - mae: 84.929 - mean_q: -112.382 Interval 6581 (3290000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3139 8 episodes - episode_reward: -147.816 [-234.929, 41.630] - loss: 9.710 - mae: 84.895 - mean_q: -112.386 Interval 6582 (3290500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7424 7 episodes - episode_reward: -183.582 [-214.090, -139.784] - loss: 9.581 - mae: 84.853 - mean_q: -112.390 Interval 6583 (3291000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.6889 6 episodes - episode_reward: -366.381 [-642.453, -100.000] - loss: 8.593 - mae: 84.816 - mean_q: -112.367 Interval 6584 (3291500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0684 4 episodes - episode_reward: -467.443 [-598.364, -300.595] - loss: 11.211 - mae: 84.809 - mean_q: -112.327 Interval 6585 (3292000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7077 7 episodes - episode_reward: -314.772 [-1013.127, -117.864] - loss: 11.516 - mae: 84.809 - mean_q: -112.310 Interval 6586 (3292500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4446 9 episodes - episode_reward: -197.324 [-391.005, -100.000] - loss: 7.442 - mae: 84.809 - mean_q: -112.318 Interval 6587 (3293000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7532 10 episodes - episode_reward: -186.885 [-271.135, -100.000] - loss: 8.662 - mae: 84.801 - mean_q: -112.315 Interval 6588 (3293500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8418 7 episodes - episode_reward: -202.531 [-309.173, -150.690] - loss: 10.262 - mae: 84.812 - mean_q: -112.318 Interval 6589 (3294000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5925 6 episodes - episode_reward: -215.523 [-309.659, -142.397] - loss: 12.293 - mae: 84.834 - mean_q: -112.299 Interval 6590 (3294500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2492 7 episodes - episode_reward: -157.465 [-185.353, -120.283] - loss: 7.891 - mae: 84.820 - mean_q: -112.312 Interval 6591 (3295000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5457 7 episodes - episode_reward: -177.462 [-217.113, -134.328] - loss: 8.611 - mae: 84.839 - mean_q: -112.316 Interval 6592 (3295500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8913 8 episodes - episode_reward: -183.024 [-226.391, -112.937] - loss: 10.821 - mae: 84.864 - mean_q: -112.326 Interval 6593 (3296000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2457 6 episodes - episode_reward: -183.571 [-280.849, -122.440] - loss: 10.826 - mae: 84.866 - mean_q: -112.301 Interval 6594 (3296500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4341 7 episodes - episode_reward: -178.072 [-241.984, -121.298] - loss: 8.841 - mae: 84.833 - mean_q: -112.260 Interval 6595 (3297000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7658 8 episodes - episode_reward: -174.938 [-246.734, -18.554] - loss: 10.480 - mae: 84.845 - mean_q: -112.239 Interval 6596 (3297500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7784 9 episodes - episode_reward: -151.266 [-220.156, -27.926] - loss: 7.696 - mae: 84.831 - mean_q: -112.212 Interval 6597 (3298000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8062 8 episodes - episode_reward: -179.951 [-238.189, -139.881] - loss: 8.236 - mae: 84.830 - mean_q: -112.191 Interval 6598 (3298500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1452 5 episodes - episode_reward: -216.753 [-308.441, -127.333] - loss: 9.426 - mae: 84.837 - mean_q: -112.183 Interval 6599 (3299000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2879 7 episodes - episode_reward: -157.958 [-220.882, -118.273] - loss: 7.766 - mae: 84.821 - mean_q: -112.165 Interval 6600 (3299500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2355 9 episodes - episode_reward: -164.874 [-241.550, -92.012] - loss: 10.996 - mae: 84.822 - mean_q: -112.163 Interval 6601 (3300000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8856 9 episodes - episode_reward: -179.733 [-286.526, -123.514] - loss: 7.597 - mae: 84.805 - mean_q: -112.152 Interval 6602 (3300500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8863 7 episodes - episode_reward: -191.522 [-271.065, -145.729] - loss: 7.909 - mae: 84.799 - mean_q: -112.154 Interval 6603 (3301000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1704 8 episodes - episode_reward: -199.536 [-262.668, -152.722] - loss: 9.176 - mae: 84.780 - mean_q: -112.157 Interval 6604 (3301500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4928 9 episodes - episode_reward: -198.995 [-367.485, -100.000] - loss: 10.419 - mae: 84.792 - mean_q: -112.138 Interval 6605 (3302000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1190 9 episodes - episode_reward: -177.533 [-249.518, -100.000] - loss: 10.643 - mae: 84.789 - mean_q: -112.113 Interval 6606 (3302500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8597 8 episodes - episode_reward: -168.325 [-256.620, -106.488] - loss: 12.787 - mae: 84.788 - mean_q: -112.080 Interval 6607 (3303000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4488 10 episodes - episode_reward: -179.293 [-278.257, -100.000] - loss: 11.418 - mae: 84.760 - mean_q: -112.063 Interval 6608 (3303500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6258 7 episodes - episode_reward: -185.519 [-298.922, -122.389] - loss: 11.204 - mae: 84.733 - mean_q: -112.038 Interval 6609 (3304000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9369 7 episodes - episode_reward: -207.395 [-238.959, -133.838] - loss: 11.089 - mae: 84.723 - mean_q: -111.992 Interval 6610 (3304500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1447 7 episodes - episode_reward: -226.887 [-338.204, -158.285] - loss: 9.607 - mae: 84.688 - mean_q: -111.966 Interval 6611 (3305000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2603 9 episodes - episode_reward: -179.446 [-321.833, -113.703] - loss: 10.958 - mae: 84.697 - mean_q: -111.951 Interval 6612 (3305500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8414 8 episodes - episode_reward: -177.362 [-256.430, -100.000] - loss: 10.502 - mae: 84.685 - mean_q: -111.951 Interval 6613 (3306000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1813 9 episodes - episode_reward: -177.030 [-275.058, -85.439] - loss: 10.060 - mae: 84.679 - mean_q: -111.956 Interval 6614 (3306500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7286 10 episodes - episode_reward: -185.075 [-249.928, -113.736] - loss: 8.976 - mae: 84.679 - mean_q: -111.977 Interval 6615 (3307000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9650 8 episodes - episode_reward: -187.028 [-258.120, -113.244] - loss: 13.899 - mae: 84.696 - mean_q: -111.953 Interval 6616 (3307500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3425 7 episodes - episode_reward: -171.435 [-264.992, -42.020] - loss: 9.117 - mae: 84.677 - mean_q: -111.954 Interval 6617 (3308000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1741 6 episodes - episode_reward: -159.202 [-227.129, -122.764] - loss: 10.238 - mae: 84.656 - mean_q: -111.959 Interval 6618 (3308500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6604 8 episodes - episode_reward: -180.484 [-277.390, -100.000] - loss: 9.312 - mae: 84.647 - mean_q: -111.959 Interval 6619 (3309000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5660 8 episodes - episode_reward: -158.695 [-211.991, -120.586] - loss: 8.627 - mae: 84.646 - mean_q: -111.949 Interval 6620 (3309500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1954 8 episodes - episode_reward: -144.899 [-208.704, -22.405] - loss: 8.911 - mae: 84.641 - mean_q: -111.941 Interval 6621 (3310000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4163 8 episodes - episode_reward: -213.521 [-430.017, -100.000] - loss: 10.018 - mae: 84.633 - mean_q: -111.912 Interval 6622 (3310500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5782 8 episodes - episode_reward: -167.065 [-237.261, -108.756] - loss: 13.224 - mae: 84.639 - mean_q: -111.887 Interval 6623 (3311000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2981 6 episodes - episode_reward: -172.891 [-216.608, -108.776] - loss: 12.655 - mae: 84.633 - mean_q: -111.849 Interval 6624 (3311500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0436 9 episodes - episode_reward: -176.619 [-294.664, -100.000] - loss: 12.866 - mae: 84.630 - mean_q: -111.819 Interval 6625 (3312000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5219 6 episodes - episode_reward: -193.805 [-230.057, -133.136] - loss: 9.521 - mae: 84.596 - mean_q: -111.813 Interval 6626 (3312500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8054 9 episodes - episode_reward: -163.086 [-254.900, 37.209] - loss: 14.465 - mae: 84.607 - mean_q: -111.788 Interval 6627 (3313000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4757 6 episodes - episode_reward: -200.867 [-318.975, -148.365] - loss: 14.147 - mae: 84.608 - mean_q: -111.736 Interval 6628 (3313500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8361 7 episodes - episode_reward: -191.446 [-309.420, -125.914] - loss: 10.973 - mae: 84.564 - mean_q: -111.732 Interval 6629 (3314000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7755 9 episodes - episode_reward: -164.248 [-267.316, 17.442] - loss: 8.372 - mae: 84.535 - mean_q: -111.720 Interval 6630 (3314500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6046 7 episodes - episode_reward: -182.963 [-283.505, -89.804] - loss: 8.357 - mae: 84.513 - mean_q: -111.721 Interval 6631 (3315000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3486 6 episodes - episode_reward: -194.650 [-277.372, -150.246] - loss: 10.736 - mae: 84.512 - mean_q: -111.709 Interval 6632 (3315500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6384 7 episodes - episode_reward: -190.642 [-253.901, -136.614] - loss: 8.362 - mae: 84.472 - mean_q: -111.688 Interval 6633 (3316000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1340 8 episodes - episode_reward: -200.178 [-265.814, -109.933] - loss: 8.037 - mae: 84.472 - mean_q: -111.710 Interval 6634 (3316500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1113 8 episodes - episode_reward: -185.194 [-317.331, -100.000] - loss: 8.557 - mae: 84.470 - mean_q: -111.723 Interval 6635 (3317000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5335 6 episodes - episode_reward: -224.332 [-358.236, -175.099] - loss: 14.717 - mae: 84.490 - mean_q: -111.690 Interval 6636 (3317500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0078 7 episodes - episode_reward: -139.023 [-249.517, 38.900] - loss: 8.731 - mae: 84.450 - mean_q: -111.675 Interval 6637 (3318000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9404 8 episodes - episode_reward: -180.999 [-250.353, -146.536] - loss: 9.562 - mae: 84.454 - mean_q: -111.656 Interval 6638 (3318500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0635 8 episodes - episode_reward: -198.102 [-238.466, -146.303] - loss: 11.565 - mae: 84.438 - mean_q: -111.655 Interval 6639 (3319000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6792 8 episodes - episode_reward: -168.549 [-227.055, -31.719] - loss: 8.659 - mae: 84.423 - mean_q: -111.639 Interval 6640 (3319500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8328 8 episodes - episode_reward: -170.162 [-281.022, -108.442] - loss: 9.848 - mae: 84.404 - mean_q: -111.628 Interval 6641 (3320000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7963 9 episodes - episode_reward: -162.863 [-222.400, -113.689] - loss: 8.073 - mae: 84.384 - mean_q: -111.633 Interval 6642 (3320500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9723 8 episodes - episode_reward: -179.068 [-272.378, -138.151] - loss: 11.319 - mae: 84.405 - mean_q: -111.625 Interval 6643 (3321000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2750 8 episodes - episode_reward: -208.123 [-264.579, -139.254] - loss: 9.493 - mae: 84.389 - mean_q: -111.613 Interval 6644 (3321500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7867 7 episodes - episode_reward: -177.406 [-269.127, -132.315] - loss: 10.101 - mae: 84.392 - mean_q: -111.619 Interval 6645 (3322000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3039 8 episodes - episode_reward: -165.384 [-298.306, -102.368] - loss: 9.911 - mae: 84.382 - mean_q: -111.613 Interval 6646 (3322500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9390 8 episodes - episode_reward: -176.083 [-245.663, -100.000] - loss: 8.982 - mae: 84.378 - mean_q: -111.598 Interval 6647 (3323000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2487 8 episodes - episode_reward: -203.291 [-230.181, -157.844] - loss: 12.625 - mae: 84.389 - mean_q: -111.593 Interval 6648 (3323500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1080 8 episodes - episode_reward: -192.496 [-248.946, -133.535] - loss: 7.520 - mae: 84.360 - mean_q: -111.574 Interval 6649 (3324000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9280 7 episodes - episode_reward: -212.942 [-296.126, -141.331] - loss: 8.136 - mae: 84.358 - mean_q: -111.580 Interval 6650 (3324500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7658 8 episodes - episode_reward: -170.387 [-242.707, -114.821] - loss: 9.269 - mae: 84.368 - mean_q: -111.591 Interval 6651 (3325000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1511 6 episodes - episode_reward: -177.227 [-257.691, -30.604] - loss: 8.842 - mae: 84.349 - mean_q: -111.606 Interval 6652 (3325500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1791 9 episodes - episode_reward: -164.090 [-272.141, -36.469] - loss: 7.959 - mae: 84.363 - mean_q: -111.608 Interval 6653 (3326000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4219 7 episodes - episode_reward: -199.378 [-298.750, -125.616] - loss: 8.360 - mae: 84.371 - mean_q: -111.626 Interval 6654 (3326500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9565 7 episodes - episode_reward: -135.528 [-201.477, 13.574] - loss: 7.855 - mae: 84.365 - mean_q: -111.614 Interval 6655 (3327000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2127 8 episodes - episode_reward: -205.073 [-393.833, -151.629] - loss: 12.031 - mae: 84.375 - mean_q: -111.613 Interval 6656 (3327500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1655 8 episodes - episode_reward: -200.609 [-249.017, -105.683] - loss: 11.092 - mae: 84.366 - mean_q: -111.587 Interval 6657 (3328000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7122 6 episodes - episode_reward: -212.756 [-277.985, -163.581] - loss: 11.634 - mae: 84.351 - mean_q: -111.587 Interval 6658 (3328500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7541 8 episodes - episode_reward: -178.665 [-263.151, -119.297] - loss: 9.675 - mae: 84.336 - mean_q: -111.575 Interval 6659 (3329000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0755 10 episodes - episode_reward: -151.912 [-216.111, -52.009] - loss: 12.495 - mae: 84.335 - mean_q: -111.570 Interval 6660 (3329500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6321 7 episodes - episode_reward: -179.256 [-225.280, -128.254] - loss: 10.577 - mae: 84.306 - mean_q: -111.570 Interval 6661 (3330000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5441 11 episodes - episode_reward: -162.700 [-275.755, -71.552] - loss: 10.382 - mae: 84.282 - mean_q: -111.546 Interval 6662 (3330500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6788 7 episodes - episode_reward: -197.181 [-341.545, -85.535] - loss: 7.984 - mae: 84.254 - mean_q: -111.550 Interval 6663 (3331000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9864 9 episodes - episode_reward: -166.348 [-194.628, -100.000] - loss: 6.917 - mae: 84.232 - mean_q: -111.552 Interval 6664 (3331500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8470 8 episodes - episode_reward: -179.710 [-239.644, -100.000] - loss: 11.835 - mae: 84.248 - mean_q: -111.558 Interval 6665 (3332000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6391 8 episodes - episode_reward: -168.260 [-270.299, -60.935] - loss: 7.443 - mae: 84.239 - mean_q: -111.555 Interval 6666 (3332500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3076 9 episodes - episode_reward: -163.790 [-226.114, -104.350] - loss: 7.700 - mae: 84.242 - mean_q: -111.598 Interval 6667 (3333000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4348 11 episodes - episode_reward: -170.301 [-303.141, -100.000] - loss: 7.609 - mae: 84.231 - mean_q: -111.604 Interval 6668 (3333500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7771 8 episodes - episode_reward: -170.637 [-220.971, -100.000] - loss: 11.969 - mae: 84.253 - mean_q: -111.610 Interval 6669 (3334000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9701 9 episodes - episode_reward: -170.875 [-256.276, -100.000] - loss: 9.257 - mae: 84.245 - mean_q: -111.605 Interval 6670 (3334500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6470 8 episodes - episode_reward: -156.041 [-268.696, -123.521] - loss: 7.512 - mae: 84.243 - mean_q: -111.625 Interval 6671 (3335000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3019 7 episodes - episode_reward: -370.103 [-1013.559, -126.275] - loss: 7.002 - mae: 84.245 - mean_q: -111.655 Interval 6672 (3335500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8879 8 episodes - episode_reward: -185.375 [-230.298, -81.511] - loss: 10.077 - mae: 84.294 - mean_q: -111.675 Interval 6673 (3336000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3572 9 episodes - episode_reward: -184.385 [-337.840, -100.000] - loss: 8.172 - mae: 84.311 - mean_q: -111.687 Interval 6674 (3336500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0766 7 episodes - episode_reward: -232.935 [-620.366, -120.488] - loss: 9.614 - mae: 84.350 - mean_q: -111.702 Interval 6675 (3337000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8129 8 episodes - episode_reward: -173.449 [-281.462, -100.000] - loss: 8.279 - mae: 84.383 - mean_q: -111.703 Interval 6676 (3337500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1422 8 episodes - episode_reward: -199.386 [-287.749, -141.064] - loss: 8.971 - mae: 84.411 - mean_q: -111.725 Interval 6677 (3338000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3355 6 episodes - episode_reward: -184.755 [-213.357, -141.362] - loss: 9.036 - mae: 84.436 - mean_q: -111.747 Interval 6678 (3338500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9044 8 episodes - episode_reward: -181.308 [-344.046, -109.212] - loss: 9.939 - mae: 84.427 - mean_q: -111.773 Interval 6679 (3339000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8119 8 episodes - episode_reward: -183.907 [-224.866, -128.391] - loss: 7.620 - mae: 84.454 - mean_q: -111.773 Interval 6680 (3339500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5708 7 episodes - episode_reward: -175.913 [-238.065, -116.193] - loss: 9.975 - mae: 84.474 - mean_q: -111.792 Interval 6681 (3340000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6683 6 episodes - episode_reward: -218.129 [-375.867, -164.861] - loss: 9.495 - mae: 84.491 - mean_q: -111.802 Interval 6682 (3340500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4180 10 episodes - episode_reward: -171.115 [-233.993, -115.833] - loss: 9.132 - mae: 84.479 - mean_q: -111.800 Interval 6683 (3341000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0255 9 episodes - episode_reward: -175.649 [-220.109, -138.097] - loss: 9.244 - mae: 84.473 - mean_q: -111.800 Interval 6684 (3341500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9607 9 episodes - episode_reward: -161.092 [-204.788, -109.545] - loss: 9.506 - mae: 84.445 - mean_q: -111.790 Interval 6685 (3342000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3901 8 episodes - episode_reward: -150.955 [-255.507, -30.975] - loss: 7.668 - mae: 84.406 - mean_q: -111.790 Interval 6686 (3342500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7553 5 episodes - episode_reward: -457.734 [-722.167, -100.000] - loss: 9.868 - mae: 84.384 - mean_q: -111.718 Interval 6687 (3343000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.3749 4 episodes - episode_reward: -564.663 [-1531.921, -89.813] - loss: 8.783 - mae: 84.384 - mean_q: -111.725 Interval 6688 (3343500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5110 8 episodes - episode_reward: -157.172 [-251.350, -15.313] - loss: 8.547 - mae: 84.414 - mean_q: -111.750 Interval 6689 (3344000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2306 7 episodes - episode_reward: -149.235 [-228.447, 40.400] - loss: 11.686 - mae: 84.438 - mean_q: -111.756 Interval 6690 (3344500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6253 7 episodes - episode_reward: -193.366 [-289.178, -141.700] - loss: 9.882 - mae: 84.432 - mean_q: -111.753 Interval 6691 (3345000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0587 8 episodes - episode_reward: -192.956 [-248.879, -141.313] - loss: 7.990 - mae: 84.442 - mean_q: -111.754 Interval 6692 (3345500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7457 8 episodes - episode_reward: -183.599 [-241.850, -158.935] - loss: 10.048 - mae: 84.475 - mean_q: -111.756 Interval 6693 (3346000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9857 9 episodes - episode_reward: -155.407 [-249.882, 8.690] - loss: 7.381 - mae: 84.479 - mean_q: -111.760 Interval 6694 (3346500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9039 9 episodes - episode_reward: -165.872 [-249.641, -52.369] - loss: 11.525 - mae: 84.499 - mean_q: -111.773 Interval 6695 (3347000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8131 7 episodes - episode_reward: -200.885 [-305.022, -143.835] - loss: 11.010 - mae: 84.508 - mean_q: -111.748 Interval 6696 (3347500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2631 9 episodes - episode_reward: -181.289 [-245.034, -100.000] - loss: 9.722 - mae: 84.530 - mean_q: -111.721 Interval 6697 (3348000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6343 8 episodes - episode_reward: -160.299 [-246.305, -100.000] - loss: 12.184 - mae: 84.521 - mean_q: -111.708 Interval 6698 (3348500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8357 8 episodes - episode_reward: -179.049 [-251.527, -51.494] - loss: 10.781 - mae: 84.524 - mean_q: -111.688 Interval 6699 (3349000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3175 8 episodes - episode_reward: -148.253 [-218.005, -42.405] - loss: 8.995 - mae: 84.509 - mean_q: -111.692 Interval 6700 (3349500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9376 8 episodes - episode_reward: -182.193 [-239.562, -131.421] - loss: 11.390 - mae: 84.534 - mean_q: -111.698 Interval 6701 (3350000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6331 7 episodes - episode_reward: -192.117 [-227.065, -152.537] - loss: 14.703 - mae: 84.533 - mean_q: -111.679 Interval 6702 (3350500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3316 7 episodes - episode_reward: -165.192 [-316.136, 8.904] - loss: 10.538 - mae: 84.497 - mean_q: -111.651 Interval 6703 (3351000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7199 7 episodes - episode_reward: -184.144 [-287.443, -46.867] - loss: 10.001 - mae: 84.484 - mean_q: -111.671 Interval 6704 (3351500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0864 10 episodes - episode_reward: -161.340 [-199.706, -100.000] - loss: 11.375 - mae: 84.469 - mean_q: -111.635 Interval 6705 (3352000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5563 7 episodes - episode_reward: -177.147 [-249.671, -136.111] - loss: 13.138 - mae: 84.470 - mean_q: -111.625 Interval 6706 (3352500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8772 9 episodes - episode_reward: -164.112 [-211.723, -117.518] - loss: 7.997 - mae: 84.437 - mean_q: -111.631 Interval 6707 (3353000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5716 8 episodes - episode_reward: -160.988 [-210.032, -99.994] - loss: 9.549 - mae: 84.444 - mean_q: -111.628 Interval 6708 (3353500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6425 7 episodes - episode_reward: -189.358 [-228.009, -166.673] - loss: 11.790 - mae: 84.431 - mean_q: -111.617 Interval 6709 (3354000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3492 6 episodes - episode_reward: -183.181 [-257.375, -114.005] - loss: 10.396 - mae: 84.419 - mean_q: -111.613 Interval 6710 (3354500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2852 9 episodes - episode_reward: -187.904 [-269.840, -128.342] - loss: 10.319 - mae: 84.431 - mean_q: -111.614 Interval 6711 (3355000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4848 10 episodes - episode_reward: -177.331 [-277.034, -100.000] - loss: 7.351 - mae: 84.408 - mean_q: -111.624 Interval 6712 (3355500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3689 8 episodes - episode_reward: -208.822 [-302.001, -100.000] - loss: 9.963 - mae: 84.419 - mean_q: -111.626 Interval 6713 (3356000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5768 7 episodes - episode_reward: -183.073 [-203.942, -144.060] - loss: 7.507 - mae: 84.398 - mean_q: -111.623 Interval 6714 (3356500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4592 6 episodes - episode_reward: -197.929 [-248.884, -157.132] - loss: 10.164 - mae: 84.414 - mean_q: -111.645 Interval 6715 (3357000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7190 7 episodes - episode_reward: -186.881 [-227.668, -137.974] - loss: 8.577 - mae: 84.420 - mean_q: -111.624 Interval 6716 (3357500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3729 7 episodes - episode_reward: -180.836 [-244.018, 7.946] - loss: 11.764 - mae: 84.434 - mean_q: -111.620 Interval 6717 (3358000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0489 8 episodes - episode_reward: -191.650 [-375.287, -116.371] - loss: 9.706 - mae: 84.415 - mean_q: -111.607 Interval 6718 (3358500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5525 7 episodes - episode_reward: -184.553 [-210.776, -159.072] - loss: 11.234 - mae: 84.428 - mean_q: -111.602 Interval 6719 (3359000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9592 9 episodes - episode_reward: -162.697 [-215.461, -100.000] - loss: 10.463 - mae: 84.405 - mean_q: -111.593 Interval 6720 (3359500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7671 9 episodes - episode_reward: -144.761 [-195.156, -0.051] - loss: 8.858 - mae: 84.393 - mean_q: -111.599 Interval 6721 (3360000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3479 9 episodes - episode_reward: -186.561 [-327.760, -100.000] - loss: 9.817 - mae: 84.405 - mean_q: -111.603 Interval 6722 (3360500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7824 7 episodes - episode_reward: -205.142 [-261.529, -151.878] - loss: 8.723 - mae: 84.386 - mean_q: -111.614 Interval 6723 (3361000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8467 8 episodes - episode_reward: -167.445 [-217.599, -124.708] - loss: 11.299 - mae: 84.401 - mean_q: -111.626 Interval 6724 (3361500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5185 9 episodes - episode_reward: -210.000 [-284.995, -168.000] - loss: 9.382 - mae: 84.398 - mean_q: -111.617 Interval 6725 (3362000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8372 8 episodes - episode_reward: -177.469 [-213.527, -128.659] - loss: 9.082 - mae: 84.411 - mean_q: -111.630 Interval 6726 (3362500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6101 6 episodes - episode_reward: -198.409 [-284.733, -141.018] - loss: 10.147 - mae: 84.415 - mean_q: -111.636 Interval 6727 (3363000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8690 8 episodes - episode_reward: -183.393 [-226.073, -126.404] - loss: 9.914 - mae: 84.430 - mean_q: -111.617 Interval 6728 (3363500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4086 7 episodes - episode_reward: -239.569 [-330.476, -173.483] - loss: 9.227 - mae: 84.411 - mean_q: -111.607 Interval 6729 (3364000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1220 9 episodes - episode_reward: -182.511 [-242.045, -130.139] - loss: 9.083 - mae: 84.432 - mean_q: -111.626 Interval 6730 (3364500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9098 7 episodes - episode_reward: -210.387 [-370.363, -152.537] - loss: 9.716 - mae: 84.424 - mean_q: -111.670 Interval 6731 (3365000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8362 7 episodes - episode_reward: -186.643 [-249.169, -144.458] - loss: 12.270 - mae: 84.427 - mean_q: -111.662 Interval 6732 (3365500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2022 8 episodes - episode_reward: -211.120 [-289.125, -99.071] - loss: 7.018 - mae: 84.420 - mean_q: -111.680 Interval 6733 (3366000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1981 7 episodes - episode_reward: -157.607 [-194.577, -119.660] - loss: 14.899 - mae: 84.470 - mean_q: -111.667 Interval 6734 (3366500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8536 8 episodes - episode_reward: -182.014 [-275.882, -124.643] - loss: 9.167 - mae: 84.426 - mean_q: -111.661 Interval 6735 (3367000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6451 7 episodes - episode_reward: -182.862 [-378.803, -17.299] - loss: 8.405 - mae: 84.420 - mean_q: -111.673 Interval 6736 (3367500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2519 6 episodes - episode_reward: -185.170 [-222.199, -147.558] - loss: 8.308 - mae: 84.428 - mean_q: -111.701 Interval 6737 (3368000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7637 8 episodes - episode_reward: -175.625 [-254.066, -85.829] - loss: 9.895 - mae: 84.433 - mean_q: -111.706 Interval 6738 (3368500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5796 8 episodes - episode_reward: -221.042 [-353.640, -144.992] - loss: 10.368 - mae: 84.437 - mean_q: -111.721 Interval 6739 (3369000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7105 8 episodes - episode_reward: -296.957 [-1214.259, -100.000] - loss: 9.624 - mae: 84.430 - mean_q: -111.745 Interval 6740 (3369500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5372 9 episodes - episode_reward: -252.735 [-539.676, -167.544] - loss: 9.085 - mae: 84.453 - mean_q: -111.777 Interval 6741 (3370000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9352 8 episodes - episode_reward: -180.401 [-231.896, -150.945] - loss: 10.697 - mae: 84.478 - mean_q: -111.794 Interval 6742 (3370500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2134 7 episodes - episode_reward: -150.970 [-262.752, 24.656] - loss: 10.699 - mae: 84.518 - mean_q: -111.779 Interval 6743 (3371000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9816 8 episodes - episode_reward: -187.850 [-302.009, -100.000] - loss: 10.428 - mae: 84.520 - mean_q: -111.808 Interval 6744 (3371500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0249 10 episodes - episode_reward: -162.404 [-221.555, -111.720] - loss: 9.775 - mae: 84.533 - mean_q: -111.820 Interval 6745 (3372000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7081 7 episodes - episode_reward: -192.937 [-226.436, -143.954] - loss: 10.852 - mae: 84.557 - mean_q: -111.840 Interval 6746 (3372500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2848 5 episodes - episode_reward: -210.535 [-366.321, -134.068] - loss: 9.819 - mae: 84.584 - mean_q: -111.832 Interval 6747 (3373000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9838 7 episodes - episode_reward: -142.081 [-289.177, 30.055] - loss: 12.063 - mae: 84.600 - mean_q: -111.846 Interval 6748 (3373500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2777 9 episodes - episode_reward: -190.102 [-333.782, -100.000] - loss: 10.738 - mae: 84.612 - mean_q: -111.850 Interval 6749 (3374000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8411 8 episodes - episode_reward: -176.726 [-280.655, -120.038] - loss: 10.085 - mae: 84.616 - mean_q: -111.881 Interval 6750 (3374500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9905 8 episodes - episode_reward: -180.403 [-299.251, -140.571] - loss: 10.049 - mae: 84.637 - mean_q: -111.854 Interval 6751 (3375000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8373 8 episodes - episode_reward: -179.564 [-259.229, -135.040] - loss: 8.447 - mae: 84.634 - mean_q: -111.861 Interval 6752 (3375500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0870 8 episodes - episode_reward: -182.118 [-237.535, -106.414] - loss: 10.428 - mae: 84.662 - mean_q: -111.878 Interval 6753 (3376000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5857 11 episodes - episode_reward: -175.528 [-271.047, -131.910] - loss: 12.765 - mae: 84.676 - mean_q: -111.860 Interval 6754 (3376500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8159 9 episodes - episode_reward: -154.990 [-201.200, -117.914] - loss: 8.749 - mae: 84.688 - mean_q: -111.875 Interval 6755 (3377000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4020 10 episodes - episode_reward: -169.741 [-224.876, -124.590] - loss: 11.042 - mae: 84.706 - mean_q: -111.880 Interval 6756 (3377500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7876 8 episodes - episode_reward: -178.039 [-275.129, -135.684] - loss: 9.122 - mae: 84.705 - mean_q: -111.853 Interval 6757 (3378000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2007 7 episodes - episode_reward: -153.467 [-247.405, 19.348] - loss: 10.171 - mae: 84.712 - mean_q: -111.895 Interval 6758 (3378500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5043 6 episodes - episode_reward: -201.350 [-280.547, -109.196] - loss: 9.910 - mae: 84.739 - mean_q: -111.902 Interval 6759 (3379000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6889 7 episodes - episode_reward: -180.420 [-237.560, -86.774] - loss: 9.364 - mae: 84.741 - mean_q: -111.927 Interval 6760 (3379500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2184 7 episodes - episode_reward: -174.938 [-285.810, -110.495] - loss: 8.135 - mae: 84.743 - mean_q: -111.944 Interval 6761 (3380000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3556 10 episodes - episode_reward: -168.551 [-290.498, -114.866] - loss: 11.375 - mae: 84.784 - mean_q: -111.966 Interval 6762 (3380500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8939 6 episodes - episode_reward: -146.941 [-289.422, 4.670] - loss: 13.441 - mae: 84.785 - mean_q: -111.956 Interval 6763 (3381000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5584 8 episodes - episode_reward: -160.657 [-195.506, -110.874] - loss: 9.652 - mae: 84.773 - mean_q: -111.948 Interval 6764 (3381500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1710 8 episodes - episode_reward: -203.150 [-309.686, -104.939] - loss: 8.883 - mae: 84.780 - mean_q: -111.940 Interval 6765 (3382000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0845 7 episodes - episode_reward: -222.252 [-347.249, -162.726] - loss: 9.323 - mae: 84.773 - mean_q: -111.962 Interval 6766 (3382500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5048 7 episodes - episode_reward: -173.759 [-367.634, -100.000] - loss: 9.497 - mae: 84.788 - mean_q: -111.975 Interval 6767 (3383000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9224 10 episodes - episode_reward: -149.582 [-224.691, -58.517] - loss: 12.836 - mae: 84.803 - mean_q: -111.953 Interval 6768 (3383500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7953 6 episodes - episode_reward: -214.620 [-279.837, -177.623] - loss: 10.530 - mae: 84.799 - mean_q: -111.963 Interval 6769 (3384000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1663 10 episodes - episode_reward: -170.158 [-254.736, -100.000] - loss: 12.717 - mae: 84.804 - mean_q: -111.975 Interval 6770 (3384500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9512 7 episodes - episode_reward: -202.974 [-296.344, -137.458] - loss: 11.373 - mae: 84.799 - mean_q: -111.974 Interval 6771 (3385000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1655 8 episodes - episode_reward: -195.724 [-266.528, -94.011] - loss: 9.129 - mae: 84.776 - mean_q: -111.993 Interval 6772 (3385500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0510 8 episodes - episode_reward: -191.894 [-296.290, -97.620] - loss: 7.999 - mae: 84.758 - mean_q: -112.007 Interval 6773 (3386000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6158 10 episodes - episode_reward: -182.250 [-256.038, -121.305] - loss: 8.717 - mae: 84.757 - mean_q: -112.067 Interval 6774 (3386500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6600 7 episodes - episode_reward: -190.875 [-299.475, -121.038] - loss: 8.378 - mae: 84.734 - mean_q: -112.120 Interval 6775 (3387000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2851 8 episodes - episode_reward: -204.064 [-271.433, -166.378] - loss: 12.530 - mae: 84.755 - mean_q: -112.132 Interval 6776 (3387500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5757 10 episodes - episode_reward: -179.581 [-315.245, -126.235] - loss: 10.850 - mae: 84.748 - mean_q: -112.120 Interval 6777 (3388000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0676 8 episodes - episode_reward: -189.500 [-265.086, -139.639] - loss: 10.965 - mae: 84.759 - mean_q: -112.136 Interval 6778 (3388500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0807 7 episodes - episode_reward: -148.922 [-199.078, 12.339] - loss: 12.221 - mae: 84.775 - mean_q: -112.127 Interval 6779 (3389000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6370 7 episodes - episode_reward: -188.296 [-332.171, -112.594] - loss: 14.051 - mae: 84.780 - mean_q: -112.141 Interval 6780 (3389500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2447 7 episodes - episode_reward: -228.516 [-310.833, -174.495] - loss: 7.915 - mae: 84.780 - mean_q: -112.165 Interval 6781 (3390000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0380 9 episodes - episode_reward: -175.655 [-266.848, -127.292] - loss: 11.636 - mae: 84.804 - mean_q: -112.173 Interval 6782 (3390500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3510 7 episodes - episode_reward: -163.275 [-251.136, -116.196] - loss: 12.838 - mae: 84.820 - mean_q: -112.178 Interval 6783 (3391000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2761 8 episodes - episode_reward: -207.122 [-241.099, -148.698] - loss: 13.773 - mae: 84.816 - mean_q: -112.164 Interval 6784 (3391500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6325 6 episodes - episode_reward: -195.280 [-363.641, -129.606] - loss: 13.682 - mae: 84.821 - mean_q: -112.153 Interval 6785 (3392000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1690 11 episodes - episode_reward: -158.098 [-330.243, 8.621] - loss: 11.417 - mae: 84.807 - mean_q: -112.155 Interval 6786 (3392500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1896 8 episodes - episode_reward: -234.520 [-544.463, -161.297] - loss: 9.679 - mae: 84.766 - mean_q: -112.156 Interval 6787 (3393000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.1849 9 episodes - episode_reward: -358.727 [-904.055, -113.300] - loss: 12.778 - mae: 84.747 - mean_q: -112.175 Interval 6788 (3393500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2600 6 episodes - episode_reward: -414.075 [-696.395, -100.000] - loss: 12.769 - mae: 84.741 - mean_q: -112.203 Interval 6789 (3394000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.9050 7 episodes - episode_reward: -385.252 [-567.166, -157.635] - loss: 12.747 - mae: 84.781 - mean_q: -112.199 Interval 6790 (3394500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4220 4 episodes - episode_reward: -394.064 [-1002.234, -167.300] - loss: 11.301 - mae: 84.811 - mean_q: -112.233 Interval 6791 (3395000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6771 7 episodes - episode_reward: -204.198 [-293.977, -128.629] - loss: 11.665 - mae: 84.841 - mean_q: -112.261 Interval 6792 (3395500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9519 7 episodes - episode_reward: -212.116 [-304.844, -146.595] - loss: 12.088 - mae: 84.878 - mean_q: -112.294 Interval 6793 (3396000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8607 9 episodes - episode_reward: -163.832 [-211.031, -100.000] - loss: 12.489 - mae: 84.914 - mean_q: -112.297 Interval 6794 (3396500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1230 8 episodes - episode_reward: -190.068 [-260.281, -113.487] - loss: 11.527 - mae: 84.941 - mean_q: -112.331 Interval 6795 (3397000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6247 8 episodes - episode_reward: -160.303 [-234.709, -112.289] - loss: 11.286 - mae: 84.974 - mean_q: -112.334 Interval 6796 (3397500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9462 8 episodes - episode_reward: -183.321 [-226.987, -137.683] - loss: 8.358 - mae: 84.983 - mean_q: -112.379 Interval 6797 (3398000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9702 7 episodes - episode_reward: -205.068 [-237.156, -151.994] - loss: 11.352 - mae: 85.030 - mean_q: -112.427 Interval 6798 (3398500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0603 9 episodes - episode_reward: -184.084 [-295.476, -41.618] - loss: 11.314 - mae: 85.063 - mean_q: -112.453 Interval 6799 (3399000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3405 9 episodes - episode_reward: -187.296 [-286.627, -100.000] - loss: 15.056 - mae: 85.106 - mean_q: -112.456 Interval 6800 (3399500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9990 7 episodes - episode_reward: -130.100 [-185.103, 17.027] - loss: 9.992 - mae: 85.112 - mean_q: -112.477 Interval 6801 (3400000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6043 7 episodes - episode_reward: -182.623 [-256.605, -100.000] - loss: 12.732 - mae: 85.141 - mean_q: -112.508 Interval 6802 (3400500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9714 7 episodes - episode_reward: -220.982 [-421.128, -143.595] - loss: 11.630 - mae: 85.164 - mean_q: -112.555 Interval 6803 (3401000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7460 7 episodes - episode_reward: -198.607 [-251.613, -135.460] - loss: 14.635 - mae: 85.202 - mean_q: -112.566 Interval 6804 (3401500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0424 8 episodes - episode_reward: -190.125 [-250.095, -132.996] - loss: 12.011 - mae: 85.202 - mean_q: -112.572 Interval 6805 (3402000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1055 9 episodes - episode_reward: -173.380 [-223.170, -92.015] - loss: 10.260 - mae: 85.233 - mean_q: -112.610 Interval 6806 (3402500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4426 7 episodes - episode_reward: -175.576 [-202.232, -140.388] - loss: 9.782 - mae: 85.261 - mean_q: -112.641 Interval 6807 (3403000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1235 8 episodes - episode_reward: -195.694 [-242.136, -152.575] - loss: 11.440 - mae: 85.310 - mean_q: -112.700 Interval 6808 (3403500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2259 5 episodes - episode_reward: -202.210 [-320.190, -146.821] - loss: 10.256 - mae: 85.342 - mean_q: -112.733 Interval 6809 (3404000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8108 8 episodes - episode_reward: -188.097 [-290.405, -122.985] - loss: 12.929 - mae: 85.386 - mean_q: -112.767 Interval 6810 (3404500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0438 8 episodes - episode_reward: -188.200 [-263.377, -125.032] - loss: 8.856 - mae: 85.391 - mean_q: -112.806 Interval 6811 (3405000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9058 8 episodes - episode_reward: -175.834 [-251.628, -126.642] - loss: 11.481 - mae: 85.427 - mean_q: -112.840 Interval 6812 (3405500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0567 9 episodes - episode_reward: -174.188 [-222.245, -110.391] - loss: 11.941 - mae: 85.459 - mean_q: -112.855 Interval 6813 (3406000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6719 8 episodes - episode_reward: -166.911 [-204.861, -111.999] - loss: 8.348 - mae: 85.462 - mean_q: -112.886 Interval 6814 (3406500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4165 7 episodes - episode_reward: -169.285 [-220.534, -128.143] - loss: 15.086 - mae: 85.526 - mean_q: -112.905 Interval 6815 (3407000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7472 8 episodes - episode_reward: -170.930 [-268.679, -126.000] - loss: 10.640 - mae: 85.533 - mean_q: -112.924 Interval 6816 (3407500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6359 8 episodes - episode_reward: -171.096 [-213.962, -132.407] - loss: 10.027 - mae: 85.561 - mean_q: -112.944 Interval 6817 (3408000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7460 6 episodes - episode_reward: -214.591 [-318.707, -167.212] - loss: 10.783 - mae: 85.602 - mean_q: -112.967 Interval 6818 (3408500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9089 8 episodes - episode_reward: -182.765 [-278.544, -115.256] - loss: 9.558 - mae: 85.602 - mean_q: -112.986 Interval 6819 (3409000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8383 9 episodes - episode_reward: -164.294 [-219.492, -100.000] - loss: 10.190 - mae: 85.641 - mean_q: -113.039 Interval 6820 (3409500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3088 7 episodes - episode_reward: -164.665 [-253.857, -95.162] - loss: 11.505 - mae: 85.662 - mean_q: -113.079 Interval 6821 (3410000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1638 10 episodes - episode_reward: -160.082 [-216.447, -100.000] - loss: 12.235 - mae: 85.701 - mean_q: -113.084 Interval 6822 (3410500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7631 8 episodes - episode_reward: -171.546 [-249.862, -46.349] - loss: 10.957 - mae: 85.716 - mean_q: -113.084 Interval 6823 (3411000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8375 8 episodes - episode_reward: -174.841 [-201.792, -137.171] - loss: 11.811 - mae: 85.744 - mean_q: -113.111 Interval 6824 (3411500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3033 10 episodes - episode_reward: -164.670 [-222.109, -121.432] - loss: 12.905 - mae: 85.769 - mean_q: -113.137 Interval 6825 (3412000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2136 9 episodes - episode_reward: -171.954 [-249.652, -131.771] - loss: 12.084 - mae: 85.806 - mean_q: -113.138 Interval 6826 (3412500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8516 8 episodes - episode_reward: -190.739 [-279.257, -86.518] - loss: 10.912 - mae: 85.811 - mean_q: -113.136 Interval 6827 (3413000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8775 7 episodes - episode_reward: -190.283 [-229.849, -139.617] - loss: 12.973 - mae: 85.838 - mean_q: -113.129 Interval 6828 (3413500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5314 9 episodes - episode_reward: -195.833 [-243.171, -100.000] - loss: 12.618 - mae: 85.835 - mean_q: -113.122 Interval 6829 (3414000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5868 8 episodes - episode_reward: -179.387 [-243.007, -105.756] - loss: 10.672 - mae: 85.834 - mean_q: -113.132 Interval 6830 (3414500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2262 6 episodes - episode_reward: -169.257 [-225.672, -148.038] - loss: 11.392 - mae: 85.844 - mean_q: -113.152 Interval 6831 (3415000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3002 9 episodes - episode_reward: -193.000 [-259.601, -100.000] - loss: 11.887 - mae: 85.861 - mean_q: -113.137 Interval 6832 (3415500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0132 8 episodes - episode_reward: -180.282 [-288.391, -100.000] - loss: 10.289 - mae: 85.849 - mean_q: -113.166 Interval 6833 (3416000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6077 7 episodes - episode_reward: -175.529 [-227.920, -143.596] - loss: 10.192 - mae: 85.852 - mean_q: -113.147 Interval 6834 (3416500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5107 9 episodes - episode_reward: -200.625 [-309.859, -120.992] - loss: 11.401 - mae: 85.865 - mean_q: -113.152 Interval 6835 (3417000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5621 7 episodes - episode_reward: -189.294 [-283.667, -66.711] - loss: 8.598 - mae: 85.848 - mean_q: -113.150 Interval 6836 (3417500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5722 7 episodes - episode_reward: -193.462 [-253.354, -144.097] - loss: 9.699 - mae: 85.861 - mean_q: -113.166 Interval 6837 (3418000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5005 8 episodes - episode_reward: -146.359 [-201.486, 11.354] - loss: 13.121 - mae: 85.894 - mean_q: -113.152 Interval 6838 (3418500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1620 9 episodes - episode_reward: -179.289 [-312.989, 0.860] - loss: 11.387 - mae: 85.886 - mean_q: -113.156 Interval 6839 (3419000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4128 7 episodes - episode_reward: -165.346 [-243.954, -118.101] - loss: 14.306 - mae: 85.883 - mean_q: -113.138 Interval 6840 (3419500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8369 8 episodes - episode_reward: -177.391 [-244.099, -107.926] - loss: 11.776 - mae: 85.851 - mean_q: -113.109 Interval 6841 (3420000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9458 9 episodes - episode_reward: -167.596 [-235.794, -116.315] - loss: 11.243 - mae: 85.837 - mean_q: -113.110 Interval 6842 (3420500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6044 7 episodes - episode_reward: -174.960 [-230.101, -100.000] - loss: 9.184 - mae: 85.807 - mean_q: -113.125 Interval 6843 (3421000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5289 10 episodes - episode_reward: -176.775 [-376.637, -100.000] - loss: 11.408 - mae: 85.801 - mean_q: -113.166 Interval 6844 (3421500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1790 8 episodes - episode_reward: -202.586 [-284.822, -100.000] - loss: 11.724 - mae: 85.800 - mean_q: -113.174 Interval 6845 (3422000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8769 8 episodes - episode_reward: -178.945 [-293.215, -15.418] - loss: 10.750 - mae: 85.784 - mean_q: -113.200 Interval 6846 (3422500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6346 8 episodes - episode_reward: -169.422 [-234.058, 10.917] - loss: 15.094 - mae: 85.802 - mean_q: -113.175 Interval 6847 (3423000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7076 7 episodes - episode_reward: -192.691 [-235.066, -100.000] - loss: 12.806 - mae: 85.783 - mean_q: -113.143 Interval 6848 (3423500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7426 8 episodes - episode_reward: -166.373 [-315.596, -50.770] - loss: 10.548 - mae: 85.779 - mean_q: -113.142 Interval 6849 (3424000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2731 7 episodes - episode_reward: -169.978 [-214.631, -100.633] - loss: 10.555 - mae: 85.772 - mean_q: -113.163 Interval 6850 (3424500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4203 8 episodes - episode_reward: -203.258 [-329.361, -130.860] - loss: 11.391 - mae: 85.786 - mean_q: -113.173 Interval 6851 (3425000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7578 6 episodes - episode_reward: -155.115 [-232.737, -114.164] - loss: 10.498 - mae: 85.779 - mean_q: -113.176 Interval 6852 (3425500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7691 8 episodes - episode_reward: -174.107 [-226.548, -138.057] - loss: 11.869 - mae: 85.772 - mean_q: -113.168 Interval 6853 (3426000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4502 7 episodes - episode_reward: -252.555 [-376.803, -144.541] - loss: 13.577 - mae: 85.770 - mean_q: -113.161 Interval 6854 (3426500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9457 7 episodes - episode_reward: -202.244 [-242.663, -122.752] - loss: 9.727 - mae: 85.752 - mean_q: -113.165 Interval 6855 (3427000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5982 7 episodes - episode_reward: -186.277 [-382.802, -28.964] - loss: 15.210 - mae: 85.781 - mean_q: -113.158 Interval 6856 (3427500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5548 8 episodes - episode_reward: -158.303 [-214.166, -82.817] - loss: 12.905 - mae: 85.752 - mean_q: -113.127 Interval 6857 (3428000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8013 8 episodes - episode_reward: -176.871 [-246.841, -139.173] - loss: 12.072 - mae: 85.719 - mean_q: -113.141 Interval 6858 (3428500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5381 7 episodes - episode_reward: -189.491 [-238.709, -141.512] - loss: 9.536 - mae: 85.710 - mean_q: -113.164 Interval 6859 (3429000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9182 6 episodes - episode_reward: -229.653 [-305.279, -178.837] - loss: 10.334 - mae: 85.727 - mean_q: -113.183 Interval 6860 (3429500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8600 9 episodes - episode_reward: -163.519 [-250.166, -26.623] - loss: 14.248 - mae: 85.740 - mean_q: -113.188 Interval 6861 (3430000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5305 7 episodes - episode_reward: -179.419 [-205.741, -135.215] - loss: 11.225 - mae: 85.720 - mean_q: -113.172 Interval 6862 (3430500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8321 7 episodes - episode_reward: -197.704 [-281.318, -125.780] - loss: 10.881 - mae: 85.724 - mean_q: -113.200 Interval 6863 (3431000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1099 7 episodes - episode_reward: -224.038 [-317.136, -148.383] - loss: 15.200 - mae: 85.731 - mean_q: -113.193 Interval 6864 (3431500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3331 8 episodes - episode_reward: -143.589 [-286.902, 4.967] - loss: 11.886 - mae: 85.725 - mean_q: -113.172 Interval 6865 (3432000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4260 9 episodes - episode_reward: -181.912 [-235.692, -100.000] - loss: 15.054 - mae: 85.747 - mean_q: -113.167 Interval 6866 (3432500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8689 8 episodes - episode_reward: -198.398 [-281.873, -140.990] - loss: 10.800 - mae: 85.711 - mean_q: -113.156 Interval 6867 (3433000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9155 8 episodes - episode_reward: -178.296 [-206.356, -145.017] - loss: 10.674 - mae: 85.718 - mean_q: -113.175 Interval 6868 (3433500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8446 7 episodes - episode_reward: -199.493 [-266.922, -163.839] - loss: 12.488 - mae: 85.738 - mean_q: -113.166 Interval 6869 (3434000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6527 7 episodes - episode_reward: -176.786 [-222.626, -136.198] - loss: 9.726 - mae: 85.703 - mean_q: -113.164 Interval 6870 (3434500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0599 8 episodes - episode_reward: -209.203 [-321.450, -92.070] - loss: 9.898 - mae: 85.710 - mean_q: -113.187 Interval 6871 (3435000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0921 8 episodes - episode_reward: -187.437 [-263.572, -100.000] - loss: 10.407 - mae: 85.706 - mean_q: -113.196 Interval 6872 (3435500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8699 8 episodes - episode_reward: -184.191 [-211.825, -145.039] - loss: 8.244 - mae: 85.706 - mean_q: -113.210 Interval 6873 (3436000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3572 7 episodes - episode_reward: -156.995 [-241.296, 6.919] - loss: 11.804 - mae: 85.730 - mean_q: -113.223 Interval 6874 (3436500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2761 6 episodes - episode_reward: -178.631 [-218.901, -123.679] - loss: 11.790 - mae: 85.734 - mean_q: -113.182 Interval 6875 (3437000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1859 10 episodes - episode_reward: -174.763 [-252.932, -63.340] - loss: 13.809 - mae: 85.749 - mean_q: -113.155 Interval 6876 (3437500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2797 8 episodes - episode_reward: -205.178 [-270.704, -129.641] - loss: 12.501 - mae: 85.740 - mean_q: -113.146 Interval 6877 (3438000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8271 8 episodes - episode_reward: -170.068 [-249.749, -100.111] - loss: 10.283 - mae: 85.726 - mean_q: -113.128 Interval 6878 (3438500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6431 8 episodes - episode_reward: -165.458 [-228.552, -100.000] - loss: 8.985 - mae: 85.708 - mean_q: -113.139 Interval 6879 (3439000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7557 7 episodes - episode_reward: -202.989 [-272.982, -137.010] - loss: 11.876 - mae: 85.707 - mean_q: -113.127 Interval 6880 (3439500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5834 7 episodes - episode_reward: -172.568 [-239.464, -104.567] - loss: 8.698 - mae: 85.687 - mean_q: -113.139 Interval 6881 (3440000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9243 8 episodes - episode_reward: -189.473 [-255.579, -142.946] - loss: 9.570 - mae: 85.694 - mean_q: -113.144 Interval 6882 (3440500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2046 8 episodes - episode_reward: -198.375 [-295.122, -126.041] - loss: 10.092 - mae: 85.694 - mean_q: -113.134 Interval 6883 (3441000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0840 7 episodes - episode_reward: -214.505 [-303.007, -121.656] - loss: 8.674 - mae: 85.691 - mean_q: -113.156 Interval 6884 (3441500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2739 8 episodes - episode_reward: -209.568 [-299.620, -100.000] - loss: 10.677 - mae: 85.702 - mean_q: -113.154 Interval 6885 (3442000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8705 8 episodes - episode_reward: -186.318 [-243.778, -118.787] - loss: 9.769 - mae: 85.702 - mean_q: -113.145 Interval 6886 (3442500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1038 8 episodes - episode_reward: -189.593 [-248.765, -137.325] - loss: 9.458 - mae: 85.681 - mean_q: -113.146 Interval 6887 (3443000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7203 8 episodes - episode_reward: -173.583 [-278.457, -107.144] - loss: 8.584 - mae: 85.637 - mean_q: -113.156 Interval 6888 (3443500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8344 7 episodes - episode_reward: -199.287 [-288.586, -135.968] - loss: 7.957 - mae: 85.599 - mean_q: -113.153 Interval 6889 (3444000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9443 7 episodes - episode_reward: -206.451 [-294.306, -138.189] - loss: 9.039 - mae: 85.546 - mean_q: -113.149 Interval 6890 (3444500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9137 8 episodes - episode_reward: -190.069 [-264.719, -125.983] - loss: 8.014 - mae: 85.481 - mean_q: -113.145 Interval 6891 (3445000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4798 7 episodes - episode_reward: -162.716 [-232.983, -21.136] - loss: 9.136 - mae: 85.428 - mean_q: -113.135 Interval 6892 (3445500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.2871 5 episodes - episode_reward: -541.211 [-1078.706, -199.282] - loss: 10.203 - mae: 85.374 - mean_q: -113.059 Interval 6893 (3446000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.5417 7 episodes - episode_reward: -365.540 [-578.228, -159.991] - loss: 7.916 - mae: 85.325 - mean_q: -113.009 Interval 6894 (3446500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8867 5 episodes - episode_reward: -525.358 [-810.196, -308.522] - loss: 8.976 - mae: 85.326 - mean_q: -113.055 Interval 6895 (3447000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7301 8 episodes - episode_reward: -168.922 [-229.498, -111.220] - loss: 12.396 - mae: 85.360 - mean_q: -113.051 Interval 6896 (3447500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4168 7 episodes - episode_reward: -178.466 [-265.214, -110.271] - loss: 11.386 - mae: 85.340 - mean_q: -113.056 Interval 6897 (3448000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9762 9 episodes - episode_reward: -163.255 [-212.269, -86.650] - loss: 9.456 - mae: 85.342 - mean_q: -113.034 Interval 6898 (3448500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9328 9 episodes - episode_reward: -164.496 [-224.775, -87.560] - loss: 9.655 - mae: 85.340 - mean_q: -113.027 Interval 6899 (3449000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0738 10 episodes - episode_reward: -150.891 [-183.107, -100.000] - loss: 9.219 - mae: 85.338 - mean_q: -113.038 Interval 6900 (3449500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9782 8 episodes - episode_reward: -184.159 [-252.186, -135.231] - loss: 10.695 - mae: 85.361 - mean_q: -113.048 Interval 6901 (3450000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6920 7 episodes - episode_reward: -195.181 [-261.747, -136.578] - loss: 8.109 - mae: 85.363 - mean_q: -113.026 Interval 6902 (3450500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9386 8 episodes - episode_reward: -184.149 [-219.676, -133.314] - loss: 9.670 - mae: 85.376 - mean_q: -113.050 Interval 6903 (3451000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9907 9 episodes - episode_reward: -173.056 [-226.129, -120.189] - loss: 8.476 - mae: 85.383 - mean_q: -113.037 Interval 6904 (3451500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1882 6 episodes - episode_reward: -158.137 [-215.337, -3.341] - loss: 8.850 - mae: 85.377 - mean_q: -113.032 Interval 6905 (3452000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8181 7 episodes - episode_reward: -213.212 [-267.366, -154.816] - loss: 8.414 - mae: 85.377 - mean_q: -113.026 Interval 6906 (3452500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1954 8 episodes - episode_reward: -198.158 [-260.907, -119.011] - loss: 10.425 - mae: 85.374 - mean_q: -113.023 Interval 6907 (3453000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5807 8 episodes - episode_reward: -164.393 [-214.802, -100.000] - loss: 9.350 - mae: 85.369 - mean_q: -113.006 Interval 6908 (3453500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5149 9 episodes - episode_reward: -197.785 [-309.099, -112.537] - loss: 10.023 - mae: 85.374 - mean_q: -113.012 Interval 6909 (3454000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7803 7 episodes - episode_reward: -187.213 [-270.667, -61.865] - loss: 8.261 - mae: 85.368 - mean_q: -113.012 Interval 6910 (3454500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7816 10 episodes - episode_reward: -192.819 [-219.038, -100.000] - loss: 9.925 - mae: 85.392 - mean_q: -113.002 Interval 6911 (3455000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6679 8 episodes - episode_reward: -170.718 [-254.363, -100.000] - loss: 9.260 - mae: 85.395 - mean_q: -112.988 Interval 6912 (3455500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6063 7 episodes - episode_reward: -181.964 [-290.011, -114.913] - loss: 12.272 - mae: 85.415 - mean_q: -112.963 Interval 6913 (3456000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5442 7 episodes - episode_reward: -185.075 [-237.326, -159.133] - loss: 13.252 - mae: 85.420 - mean_q: -112.943 Interval 6914 (3456500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0921 8 episodes - episode_reward: -191.315 [-241.628, -152.323] - loss: 10.742 - mae: 85.394 - mean_q: -112.901 Interval 6915 (3457000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3650 6 episodes - episode_reward: -189.220 [-224.518, -149.745] - loss: 11.658 - mae: 85.382 - mean_q: -112.904 Interval 6916 (3457500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9155 9 episodes - episode_reward: -165.718 [-268.639, -56.880] - loss: 9.602 - mae: 85.375 - mean_q: -112.907 Interval 6917 (3458000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4633 8 episodes - episode_reward: -159.726 [-267.944, 8.911] - loss: 12.847 - mae: 85.380 - mean_q: -112.900 Interval 6918 (3458500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4675 7 episodes - episode_reward: -163.053 [-300.680, -33.667] - loss: 9.858 - mae: 85.364 - mean_q: -112.898 Interval 6919 (3459000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1180 9 episodes - episode_reward: -180.522 [-227.642, -112.938] - loss: 11.256 - mae: 85.376 - mean_q: -112.908 Interval 6920 (3459500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6156 6 episodes - episode_reward: -208.261 [-244.630, -173.592] - loss: 11.224 - mae: 85.382 - mean_q: -112.902 Interval 6921 (3460000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5530 7 episodes - episode_reward: -193.283 [-244.407, -144.448] - loss: 7.872 - mae: 85.366 - mean_q: -112.902 Interval 6922 (3460500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3030 10 episodes - episode_reward: -164.451 [-229.804, -97.294] - loss: 9.423 - mae: 85.378 - mean_q: -112.924 Interval 6923 (3461000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7508 7 episodes - episode_reward: -200.559 [-278.654, -157.291] - loss: 8.176 - mae: 85.378 - mean_q: -112.961 Interval 6924 (3461500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5759 9 episodes - episode_reward: -199.056 [-266.178, -138.543] - loss: 10.889 - mae: 85.397 - mean_q: -112.973 Interval 6925 (3462000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9041 7 episodes - episode_reward: -196.898 [-258.674, -158.167] - loss: 10.297 - mae: 85.396 - mean_q: -112.970 Interval 6926 (3462500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2167 8 episodes - episode_reward: -209.045 [-278.779, -170.303] - loss: 9.037 - mae: 85.407 - mean_q: -112.970 Interval 6927 (3463000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9398 9 episodes - episode_reward: -155.195 [-217.953, -100.000] - loss: 10.080 - mae: 85.433 - mean_q: -112.988 Interval 6928 (3463500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8067 9 episodes - episode_reward: -161.907 [-356.750, 18.979] - loss: 12.688 - mae: 85.436 - mean_q: -112.974 Interval 6929 (3464000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3013 6 episodes - episode_reward: -188.028 [-224.427, -154.689] - loss: 10.275 - mae: 85.436 - mean_q: -112.982 Interval 6930 (3464500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4660 9 episodes - episode_reward: -198.535 [-235.541, -160.441] - loss: 10.485 - mae: 85.439 - mean_q: -113.000 Interval 6931 (3465000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5482 8 episodes - episode_reward: -161.813 [-263.655, 35.705] - loss: 11.574 - mae: 85.439 - mean_q: -113.008 Interval 6932 (3465500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4772 7 episodes - episode_reward: -176.596 [-351.748, -84.221] - loss: 11.293 - mae: 85.430 - mean_q: -113.013 Interval 6933 (3466000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5344 8 episodes - episode_reward: -152.416 [-205.886, -117.610] - loss: 13.280 - mae: 85.428 - mean_q: -113.009 Interval 6934 (3466500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0006 7 episodes - episode_reward: -215.866 [-274.908, -153.185] - loss: 13.035 - mae: 85.428 - mean_q: -113.005 Interval 6935 (3467000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5233 5 episodes - episode_reward: -229.190 [-373.074, -163.429] - loss: 11.884 - mae: 85.419 - mean_q: -112.994 Interval 6936 (3467500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0572 8 episodes - episode_reward: -208.223 [-390.794, -45.922] - loss: 14.699 - mae: 85.421 - mean_q: -112.988 Interval 6937 (3468000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9339 7 episodes - episode_reward: -192.219 [-254.690, -145.683] - loss: 10.550 - mae: 85.405 - mean_q: -112.994 Interval 6938 (3468500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7317 7 episodes - episode_reward: -133.646 [-209.324, 18.202] - loss: 11.178 - mae: 85.408 - mean_q: -113.018 Interval 6939 (3469000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3698 6 episodes - episode_reward: -199.101 [-294.869, -120.945] - loss: 13.589 - mae: 85.430 - mean_q: -113.006 Interval 6940 (3469500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0843 7 episodes - episode_reward: -279.743 [-858.986, -141.541] - loss: 10.777 - mae: 85.405 - mean_q: -113.012 Interval 6941 (3470000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9025 8 episodes - episode_reward: -191.773 [-273.104, -156.807] - loss: 11.879 - mae: 85.443 - mean_q: -113.017 Interval 6942 (3470500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1207 9 episodes - episode_reward: -174.246 [-258.482, -83.395] - loss: 10.463 - mae: 85.457 - mean_q: -113.035 Interval 6943 (3471000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3410 7 episodes - episode_reward: -171.760 [-220.721, -110.536] - loss: 12.353 - mae: 85.482 - mean_q: -113.013 Interval 6944 (3471500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5512 6 episodes - episode_reward: -213.032 [-376.994, -131.844] - loss: 11.233 - mae: 85.490 - mean_q: -113.011 Interval 6945 (3472000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7426 9 episodes - episode_reward: -148.941 [-245.523, 56.033] - loss: 8.781 - mae: 85.491 - mean_q: -113.014 Interval 6946 (3472500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1104 10 episodes - episode_reward: -158.272 [-202.157, -100.000] - loss: 11.878 - mae: 85.521 - mean_q: -113.024 Interval 6947 (3473000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5508 6 episodes - episode_reward: -190.429 [-283.411, -109.048] - loss: 10.611 - mae: 85.525 - mean_q: -113.037 Interval 6948 (3473500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5630 9 episodes - episode_reward: -153.201 [-241.124, -20.018] - loss: 11.229 - mae: 85.521 - mean_q: -113.054 Interval 6949 (3474000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9862 8 episodes - episode_reward: -190.296 [-262.235, -134.246] - loss: 12.481 - mae: 85.550 - mean_q: -113.038 Interval 6950 (3474500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2526 7 episodes - episode_reward: -158.018 [-199.344, -102.135] - loss: 11.837 - mae: 85.547 - mean_q: -113.015 Interval 6951 (3475000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9930 7 episodes - episode_reward: -133.782 [-211.019, -79.795] - loss: 9.840 - mae: 85.545 - mean_q: -113.024 Interval 6952 (3475500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9486 8 episodes - episode_reward: -187.450 [-241.294, -137.009] - loss: 11.917 - mae: 85.552 - mean_q: -113.012 Interval 6953 (3476000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2218 8 episodes - episode_reward: -201.243 [-297.592, -120.421] - loss: 11.942 - mae: 85.553 - mean_q: -112.996 Interval 6954 (3476500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6400 8 episodes - episode_reward: -166.494 [-206.115, -100.000] - loss: 10.226 - mae: 85.552 - mean_q: -112.983 Interval 6955 (3477000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6529 7 episodes - episode_reward: -194.525 [-246.413, -155.130] - loss: 9.722 - mae: 85.555 - mean_q: -112.987 Interval 6956 (3477500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1219 7 episodes - episode_reward: -145.233 [-218.753, -51.404] - loss: 13.693 - mae: 85.566 - mean_q: -112.949 Interval 6957 (3478000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3950 8 episodes - episode_reward: -152.662 [-255.238, -8.467] - loss: 12.759 - mae: 85.536 - mean_q: -112.904 Interval 6958 (3478500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1696 7 episodes - episode_reward: -152.865 [-229.529, -82.530] - loss: 9.952 - mae: 85.509 - mean_q: -112.911 Interval 6959 (3479000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6892 8 episodes - episode_reward: -173.733 [-231.823, -136.153] - loss: 11.598 - mae: 85.513 - mean_q: -112.897 Interval 6960 (3479500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2973 9 episodes - episode_reward: -183.771 [-228.504, -130.298] - loss: 10.783 - mae: 85.495 - mean_q: -112.894 Interval 6961 (3480000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6272 7 episodes - episode_reward: -186.760 [-245.170, -146.920] - loss: 12.808 - mae: 85.500 - mean_q: -112.892 Interval 6962 (3480500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4645 7 episodes - episode_reward: -158.880 [-265.244, 4.600] - loss: 10.398 - mae: 85.477 - mean_q: -112.885 Interval 6963 (3481000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1779 8 episodes - episode_reward: -214.367 [-250.418, -182.966] - loss: 8.354 - mae: 85.470 - mean_q: -112.895 Interval 6964 (3481500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7423 8 episodes - episode_reward: -164.491 [-239.915, -100.000] - loss: 12.711 - mae: 85.494 - mean_q: -112.875 Interval 6965 (3482000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6346 7 episodes - episode_reward: -187.603 [-261.988, 6.756] - loss: 10.347 - mae: 85.459 - mean_q: -112.859 Interval 6966 (3482500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8924 9 episodes - episode_reward: -160.628 [-200.677, -104.837] - loss: 8.413 - mae: 85.456 - mean_q: -112.875 Interval 6967 (3483000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7782 10 episodes - episode_reward: -144.079 [-203.425, 6.912] - loss: 11.818 - mae: 85.477 - mean_q: -112.878 Interval 6968 (3483500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6550 7 episodes - episode_reward: -180.618 [-258.822, -111.916] - loss: 14.690 - mae: 85.462 - mean_q: -112.832 Interval 6969 (3484000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8103 7 episodes - episode_reward: -190.648 [-218.656, -156.179] - loss: 13.329 - mae: 85.461 - mean_q: -112.784 Interval 6970 (3484500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3631 10 episodes - episode_reward: -181.714 [-268.758, -133.574] - loss: 11.455 - mae: 85.438 - mean_q: -112.751 Interval 6971 (3485000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8260 6 episodes - episode_reward: -204.339 [-295.084, -154.743] - loss: 10.422 - mae: 85.428 - mean_q: -112.750 Interval 6972 (3485500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1474 8 episodes - episode_reward: -158.518 [-324.240, -54.487] - loss: 10.148 - mae: 85.435 - mean_q: -112.734 Interval 6973 (3486000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1928 12 episodes - episode_reward: -174.383 [-232.363, -100.000] - loss: 10.948 - mae: 85.434 - mean_q: -112.716 Interval 6974 (3486500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1307 9 episodes - episode_reward: -172.597 [-235.651, -81.638] - loss: 11.286 - mae: 85.430 - mean_q: -112.701 Interval 6975 (3487000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8012 7 episodes - episode_reward: -195.898 [-323.756, -141.070] - loss: 13.578 - mae: 85.433 - mean_q: -112.671 Interval 6976 (3487500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6807 8 episodes - episode_reward: -172.787 [-234.042, -100.000] - loss: 11.518 - mae: 85.400 - mean_q: -112.645 Interval 6977 (3488000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5864 8 episodes - episode_reward: -160.040 [-248.417, -4.254] - loss: 11.335 - mae: 85.375 - mean_q: -112.645 Interval 6978 (3488500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8822 8 episodes - episode_reward: -181.280 [-368.412, -98.004] - loss: 11.555 - mae: 85.367 - mean_q: -112.621 Interval 6979 (3489000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1242 9 episodes - episode_reward: -162.143 [-271.250, -100.000] - loss: 9.946 - mae: 85.364 - mean_q: -112.620 Interval 6980 (3489500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2577 9 episodes - episode_reward: -182.105 [-251.587, 4.333] - loss: 14.515 - mae: 85.387 - mean_q: -112.591 Interval 6981 (3490000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4656 10 episodes - episode_reward: -178.876 [-261.036, -100.000] - loss: 9.637 - mae: 85.355 - mean_q: -112.558 Interval 6982 (3490500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4012 10 episodes - episode_reward: -174.519 [-227.220, -120.339] - loss: 11.972 - mae: 85.352 - mean_q: -112.559 Interval 6983 (3491000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4221 8 episodes - episode_reward: -212.211 [-402.253, -101.681] - loss: 10.118 - mae: 85.327 - mean_q: -112.569 Interval 6984 (3491500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4182 6 episodes - episode_reward: -203.103 [-329.436, -130.444] - loss: 14.045 - mae: 85.328 - mean_q: -112.558 Interval 6985 (3492000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0960 8 episodes - episode_reward: -183.845 [-336.073, -95.242] - loss: 11.554 - mae: 85.314 - mean_q: -112.550 Interval 6986 (3492500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7551 7 episodes - episode_reward: -194.387 [-276.265, -160.475] - loss: 13.852 - mae: 85.306 - mean_q: -112.534 Interval 6987 (3493000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6006 7 episodes - episode_reward: -191.757 [-268.219, -14.858] - loss: 13.153 - mae: 85.302 - mean_q: -112.518 Interval 6988 (3493500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9782 9 episodes - episode_reward: -157.891 [-219.230, -103.417] - loss: 14.039 - mae: 85.291 - mean_q: -112.504 Interval 6989 (3494000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5208 8 episodes - episode_reward: -169.529 [-228.198, -62.717] - loss: 11.143 - mae: 85.251 - mean_q: -112.512 Interval 6990 (3494500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4625 6 episodes - episode_reward: -200.750 [-320.665, -62.666] - loss: 11.311 - mae: 85.242 - mean_q: -112.498 Interval 6991 (3495000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1392 7 episodes - episode_reward: -156.263 [-247.954, -80.737] - loss: 13.881 - mae: 85.226 - mean_q: -112.469 Interval 6992 (3495500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2515 6 episodes - episode_reward: -187.177 [-380.210, -125.075] - loss: 13.291 - mae: 85.182 - mean_q: -112.442 Interval 6993 (3496000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5363 8 episodes - episode_reward: -158.442 [-239.067, -106.928] - loss: 11.159 - mae: 85.139 - mean_q: -112.433 Interval 6994 (3496500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5924 10 episodes - episode_reward: -181.941 [-306.662, -147.228] - loss: 14.556 - mae: 85.094 - mean_q: -112.424 Interval 6995 (3497000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9708 7 episodes - episode_reward: -203.460 [-240.629, -167.754] - loss: 11.290 - mae: 85.040 - mean_q: -112.408 Interval 6996 (3497500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5741 7 episodes - episode_reward: -171.538 [-210.731, -123.539] - loss: 11.345 - mae: 84.995 - mean_q: -112.431 Interval 6997 (3498000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5809 8 episodes - episode_reward: -170.995 [-252.212, -31.144] - loss: 9.120 - mae: 84.955 - mean_q: -112.423 Interval 6998 (3498500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2236 8 episodes - episode_reward: -203.565 [-295.357, -117.915] - loss: 11.234 - mae: 84.926 - mean_q: -112.416 Interval 6999 (3499000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6066 6 episodes - episode_reward: -272.542 [-649.401, -138.038] - loss: 13.835 - mae: 84.900 - mean_q: -112.394 Interval 7000 (3499500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3945 8 episodes - episode_reward: -351.710 [-614.841, -128.607] - loss: 14.105 - mae: 84.865 - mean_q: -112.304 Interval 7001 (3500000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2179 7 episodes - episode_reward: -384.400 [-576.152, -289.729] - loss: 10.389 - mae: 84.868 - mean_q: -112.298 Interval 7002 (3500500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9797 7 episodes - episode_reward: -214.797 [-429.807, -128.246] - loss: 16.615 - mae: 84.887 - mean_q: -112.300 Interval 7003 (3501000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4037 7 episodes - episode_reward: -161.958 [-312.646, -7.184] - loss: 14.896 - mae: 84.893 - mean_q: -112.284 Interval 7004 (3501500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0638 8 episodes - episode_reward: -203.187 [-302.586, -114.435] - loss: 13.861 - mae: 84.898 - mean_q: -112.249 Interval 7005 (3502000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4271 8 episodes - episode_reward: -148.286 [-187.314, -100.332] - loss: 14.229 - mae: 84.911 - mean_q: -112.224 Interval 7006 (3502500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3972 7 episodes - episode_reward: -166.270 [-223.000, -125.520] - loss: 16.459 - mae: 84.932 - mean_q: -112.197 Interval 7007 (3503000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2341 8 episodes - episode_reward: -202.764 [-393.914, -148.224] - loss: 14.630 - mae: 84.922 - mean_q: -112.163 Interval 7008 (3503500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1347 8 episodes - episode_reward: -138.408 [-212.385, -0.641] - loss: 15.066 - mae: 84.920 - mean_q: -112.121 Interval 7009 (3504000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0127 8 episodes - episode_reward: -189.957 [-304.102, -126.997] - loss: 15.556 - mae: 84.916 - mean_q: -112.090 Interval 7010 (3504500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5945 7 episodes - episode_reward: -177.410 [-222.954, -103.128] - loss: 13.396 - mae: 84.894 - mean_q: -112.062 Interval 7011 (3505000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4973 8 episodes - episode_reward: -179.418 [-219.652, -126.555] - loss: 14.170 - mae: 84.879 - mean_q: -112.045 Interval 7012 (3505500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6280 7 episodes - episode_reward: -161.349 [-229.075, 32.993] - loss: 14.873 - mae: 84.861 - mean_q: -112.015 Interval 7013 (3506000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4807 7 episodes - episode_reward: -185.994 [-268.898, -128.938] - loss: 10.940 - mae: 84.826 - mean_q: -111.998 Interval 7014 (3506500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7623 7 episodes - episode_reward: -189.966 [-260.301, -145.298] - loss: 15.862 - mae: 84.829 - mean_q: -111.966 Interval 7015 (3507000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4660 8 episodes - episode_reward: -159.194 [-214.990, -100.000] - loss: 8.448 - mae: 84.759 - mean_q: -111.982 Interval 7016 (3507500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8093 7 episodes - episode_reward: -199.290 [-300.239, -93.988] - loss: 12.990 - mae: 84.760 - mean_q: -111.984 Interval 7017 (3508000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9560 7 episodes - episode_reward: -198.020 [-251.313, -106.091] - loss: 15.576 - mae: 84.758 - mean_q: -111.952 Interval 7018 (3508500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2251 8 episodes - episode_reward: -211.255 [-259.773, -141.163] - loss: 12.472 - mae: 84.737 - mean_q: -111.937 Interval 7019 (3509000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2974 8 episodes - episode_reward: -203.589 [-335.223, -100.000] - loss: 14.307 - mae: 84.721 - mean_q: -111.910 Interval 7020 (3509500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1743 9 episodes - episode_reward: -178.678 [-250.236, -134.429] - loss: 12.250 - mae: 84.699 - mean_q: -111.906 Interval 7021 (3510000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5963 10 episodes - episode_reward: -173.992 [-252.359, -100.000] - loss: 13.755 - mae: 84.693 - mean_q: -111.879 Interval 7022 (3510500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8478 7 episodes - episode_reward: -213.698 [-339.371, -141.760] - loss: 14.264 - mae: 84.671 - mean_q: -111.876 Interval 7023 (3511000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2065 9 episodes - episode_reward: -177.288 [-231.360, -100.000] - loss: 13.607 - mae: 84.647 - mean_q: -111.857 Interval 7024 (3511500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2839 9 episodes - episode_reward: -182.961 [-235.404, -138.650] - loss: 9.181 - mae: 84.618 - mean_q: -111.856 Interval 7025 (3512000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8225 7 episodes - episode_reward: -197.959 [-260.359, -156.241] - loss: 13.866 - mae: 84.633 - mean_q: -111.860 Interval 7026 (3512500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1978 10 episodes - episode_reward: -163.632 [-241.375, -100.000] - loss: 11.783 - mae: 84.619 - mean_q: -111.857 Interval 7027 (3513000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9877 7 episodes - episode_reward: -201.122 [-265.846, -157.600] - loss: 16.098 - mae: 84.630 - mean_q: -111.839 Interval 7028 (3513500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5791 10 episodes - episode_reward: -178.002 [-264.784, -100.000] - loss: 12.848 - mae: 84.598 - mean_q: -111.814 Interval 7029 (3514000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9941 9 episodes - episode_reward: -176.586 [-222.024, -107.190] - loss: 16.651 - mae: 84.608 - mean_q: -111.786 Interval 7030 (3514500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0887 9 episodes - episode_reward: -168.083 [-275.416, 30.928] - loss: 10.859 - mae: 84.553 - mean_q: -111.769 Interval 7031 (3515000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8939 7 episodes - episode_reward: -212.807 [-315.326, -156.857] - loss: 15.767 - mae: 84.551 - mean_q: -111.769 Interval 7032 (3515500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5930 8 episodes - episode_reward: -151.930 [-193.047, -90.898] - loss: 11.055 - mae: 84.528 - mean_q: -111.757 Interval 7033 (3516000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8359 7 episodes - episode_reward: -133.046 [-190.290, -37.099] - loss: 13.409 - mae: 84.520 - mean_q: -111.756 Interval 7034 (3516500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2521 9 episodes - episode_reward: -186.892 [-300.806, -100.000] - loss: 11.688 - mae: 84.511 - mean_q: -111.738 Interval 7035 (3517000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4728 8 episodes - episode_reward: -214.314 [-276.280, -158.784] - loss: 9.659 - mae: 84.496 - mean_q: -111.732 Interval 7036 (3517500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0424 8 episodes - episode_reward: -191.992 [-253.907, -100.000] - loss: 12.572 - mae: 84.502 - mean_q: -111.734 Interval 7037 (3518000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6639 7 episodes - episode_reward: -189.096 [-254.502, -100.000] - loss: 12.072 - mae: 84.488 - mean_q: -111.734 Interval 7038 (3518500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5571 7 episodes - episode_reward: -183.237 [-334.749, -90.772] - loss: 13.140 - mae: 84.482 - mean_q: -111.741 Interval 7039 (3519000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8858 7 episodes - episode_reward: -199.313 [-266.922, -100.000] - loss: 11.785 - mae: 84.472 - mean_q: -111.732 Interval 7040 (3519500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7557 7 episodes - episode_reward: -200.005 [-312.668, -136.437] - loss: 11.009 - mae: 84.459 - mean_q: -111.742 Interval 7041 (3520000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8138 8 episodes - episode_reward: -179.434 [-255.437, -132.266] - loss: 12.445 - mae: 84.458 - mean_q: -111.733 Interval 7042 (3520500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.5426 8 episodes - episode_reward: -327.819 [-672.698, -151.006] - loss: 10.349 - mae: 84.436 - mean_q: -111.734 Interval 7043 (3521000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0598 7 episodes - episode_reward: -228.226 [-392.044, -90.516] - loss: 13.200 - mae: 84.454 - mean_q: -111.736 Interval 7044 (3521500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1337 7 episodes - episode_reward: -150.219 [-259.129, -1.820] - loss: 10.430 - mae: 84.484 - mean_q: -111.724 Interval 7045 (3522000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6642 8 episodes - episode_reward: -174.776 [-277.740, -70.258] - loss: 10.164 - mae: 84.491 - mean_q: -111.742 Interval 7046 (3522500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1255 9 episodes - episode_reward: -178.333 [-245.628, -110.162] - loss: 10.334 - mae: 84.525 - mean_q: -111.771 Interval 7047 (3523000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3467 6 episodes - episode_reward: -190.163 [-254.476, -132.583] - loss: 14.243 - mae: 84.549 - mean_q: -111.779 Interval 7048 (3523500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4610 7 episodes - episode_reward: -172.675 [-270.261, -101.077] - loss: 11.934 - mae: 84.542 - mean_q: -111.778 Interval 7049 (3524000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1138 8 episodes - episode_reward: -135.854 [-200.905, 28.723] - loss: 9.530 - mae: 84.535 - mean_q: -111.814 Interval 7050 (3524500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9040 8 episodes - episode_reward: -184.330 [-241.808, -133.360] - loss: 14.225 - mae: 84.578 - mean_q: -111.810 Interval 7051 (3525000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4801 8 episodes - episode_reward: -151.984 [-209.860, -97.417] - loss: 10.396 - mae: 84.570 - mean_q: -111.813 Interval 7052 (3525500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7441 8 episodes - episode_reward: -174.408 [-278.260, -82.660] - loss: 10.190 - mae: 84.590 - mean_q: -111.830 Interval 7053 (3526000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7552 8 episodes - episode_reward: -164.343 [-270.531, -18.451] - loss: 16.975 - mae: 84.622 - mean_q: -111.819 Interval 7054 (3526500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0532 7 episodes - episode_reward: -218.178 [-274.144, -145.396] - loss: 12.523 - mae: 84.597 - mean_q: -111.805 Interval 7055 (3527000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0333 7 episodes - episode_reward: -223.412 [-393.043, -167.594] - loss: 12.032 - mae: 84.596 - mean_q: -111.810 Interval 7056 (3527500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1552 6 episodes - episode_reward: -168.807 [-269.816, -107.604] - loss: 9.173 - mae: 84.590 - mean_q: -111.830 Interval 7057 (3528000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8246 8 episodes - episode_reward: -181.407 [-228.507, -139.955] - loss: 11.724 - mae: 84.627 - mean_q: -111.835 Interval 7058 (3528500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4306 8 episodes - episode_reward: -156.222 [-207.722, -60.229] - loss: 12.612 - mae: 84.640 - mean_q: -111.829 Interval 7059 (3529000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4091 6 episodes - episode_reward: -192.994 [-263.622, -149.939] - loss: 11.521 - mae: 84.634 - mean_q: -111.808 Interval 7060 (3529500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5587 8 episodes - episode_reward: -165.900 [-230.648, -108.716] - loss: 12.175 - mae: 84.645 - mean_q: -111.819 Interval 7061 (3530000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4076 7 episodes - episode_reward: -233.066 [-486.256, -141.515] - loss: 12.513 - mae: 84.666 - mean_q: -111.799 Interval 7062 (3530500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1333 7 episodes - episode_reward: -156.277 [-193.370, -119.066] - loss: 12.512 - mae: 84.662 - mean_q: -111.790 Interval 7063 (3531000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5538 7 episodes - episode_reward: -180.086 [-277.630, -143.185] - loss: 12.279 - mae: 84.653 - mean_q: -111.810 Interval 7064 (3531500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0034 6 episodes - episode_reward: -161.289 [-192.622, -134.289] - loss: 10.426 - mae: 84.639 - mean_q: -111.825 Interval 7065 (3532000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2636 7 episodes - episode_reward: -167.207 [-254.650, -64.523] - loss: 13.520 - mae: 84.635 - mean_q: -111.804 Interval 7066 (3532500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7623 7 episodes - episode_reward: -190.745 [-226.545, -164.674] - loss: 14.452 - mae: 84.640 - mean_q: -111.776 Interval 7067 (3533000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2141 8 episodes - episode_reward: -213.323 [-337.041, -112.067] - loss: 10.885 - mae: 84.631 - mean_q: -111.753 Interval 7068 (3533500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6317 7 episodes - episode_reward: -116.475 [-250.735, 8.060] - loss: 10.240 - mae: 84.625 - mean_q: -111.768 Interval 7069 (3534000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0729 9 episodes - episode_reward: -164.527 [-244.971, -117.487] - loss: 10.333 - mae: 84.629 - mean_q: -111.774 Interval 7070 (3534500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2146 9 episodes - episode_reward: -177.781 [-245.541, -111.130] - loss: 11.562 - mae: 84.648 - mean_q: -111.792 Interval 7071 (3535000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8459 7 episodes - episode_reward: -205.102 [-328.239, -100.000] - loss: 10.055 - mae: 84.626 - mean_q: -111.788 Interval 7072 (3535500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7823 8 episodes - episode_reward: -179.775 [-221.452, -107.670] - loss: 14.874 - mae: 84.653 - mean_q: -111.791 Interval 7073 (3536000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6331 6 episodes - episode_reward: -165.839 [-197.220, -141.717] - loss: 13.562 - mae: 84.651 - mean_q: -111.761 Interval 7074 (3536500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5653 9 episodes - episode_reward: -120.732 [-247.273, 267.108] - loss: 9.024 - mae: 84.643 - mean_q: -111.756 Interval 7075 (3537000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2944 7 episodes - episode_reward: -160.653 [-239.703, 47.397] - loss: 11.211 - mae: 84.649 - mean_q: -111.770 Interval 7076 (3537500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9011 8 episodes - episode_reward: -179.431 [-259.712, -125.031] - loss: 10.797 - mae: 84.642 - mean_q: -111.781 Interval 7077 (3538000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0717 9 episodes - episode_reward: -176.969 [-258.141, -100.000] - loss: 12.756 - mae: 84.643 - mean_q: -111.780 Interval 7078 (3538500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4178 6 episodes - episode_reward: -182.550 [-242.846, -111.879] - loss: 13.506 - mae: 84.635 - mean_q: -111.759 Interval 7079 (3539000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0435 9 episodes - episode_reward: -180.931 [-264.230, -60.005] - loss: 9.484 - mae: 84.648 - mean_q: -111.769 Interval 7080 (3539500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8693 8 episodes - episode_reward: -179.856 [-261.951, -105.301] - loss: 16.528 - mae: 84.671 - mean_q: -111.754 Interval 7081 (3540000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4807 7 episodes - episode_reward: -182.708 [-244.773, -122.625] - loss: 15.022 - mae: 84.661 - mean_q: -111.729 Interval 7082 (3540500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1451 7 episodes - episode_reward: -218.756 [-305.289, -160.612] - loss: 12.370 - mae: 84.643 - mean_q: -111.718 Interval 7083 (3541000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2294 10 episodes - episode_reward: -158.501 [-244.398, -100.000] - loss: 16.060 - mae: 84.652 - mean_q: -111.676 Interval 7084 (3541500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0038 9 episodes - episode_reward: -169.961 [-233.529, -35.129] - loss: 11.322 - mae: 84.622 - mean_q: -111.655 Interval 7085 (3542000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2352 7 episodes - episode_reward: -149.901 [-197.238, -94.657] - loss: 11.134 - mae: 84.623 - mean_q: -111.653 Interval 7086 (3542500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5387 7 episodes - episode_reward: -190.315 [-244.769, -136.135] - loss: 13.274 - mae: 84.624 - mean_q: -111.636 Interval 7087 (3543000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5213 7 episodes - episode_reward: -176.127 [-217.601, -93.078] - loss: 11.300 - mae: 84.602 - mean_q: -111.614 Interval 7088 (3543500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9192 8 episodes - episode_reward: -184.571 [-277.485, -100.827] - loss: 14.057 - mae: 84.609 - mean_q: -111.603 Interval 7089 (3544000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4374 9 episodes - episode_reward: -133.310 [-200.065, 18.999] - loss: 9.740 - mae: 84.578 - mean_q: -111.577 Interval 7090 (3544500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8654 7 episodes - episode_reward: -206.461 [-352.480, -107.395] - loss: 9.073 - mae: 84.553 - mean_q: -111.565 Interval 7091 (3545000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0922 9 episodes - episode_reward: -172.509 [-237.551, -72.637] - loss: 12.394 - mae: 84.560 - mean_q: -111.579 Interval 7092 (3545500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9376 8 episodes - episode_reward: -182.925 [-299.013, -100.000] - loss: 10.981 - mae: 84.552 - mean_q: -111.581 Interval 7093 (3546000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2219 9 episodes - episode_reward: -182.995 [-251.008, -100.000] - loss: 11.363 - mae: 84.559 - mean_q: -111.596 Interval 7094 (3546500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7973 7 episodes - episode_reward: -200.043 [-237.617, -154.798] - loss: 12.234 - mae: 84.567 - mean_q: -111.571 Interval 7095 (3547000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5132 8 episodes - episode_reward: -217.450 [-386.734, -126.772] - loss: 11.533 - mae: 84.555 - mean_q: -111.595 Interval 7096 (3547500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8635 6 episodes - episode_reward: -228.696 [-349.446, -121.692] - loss: 11.415 - mae: 84.569 - mean_q: -111.593 Interval 7097 (3548000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0186 9 episodes - episode_reward: -169.217 [-249.867, -100.000] - loss: 10.686 - mae: 84.567 - mean_q: -111.586 Interval 7098 (3548500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1334 8 episodes - episode_reward: -185.804 [-235.275, -123.579] - loss: 14.086 - mae: 84.568 - mean_q: -111.588 Interval 7099 (3549000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6680 7 episodes - episode_reward: -211.961 [-359.243, -93.136] - loss: 17.937 - mae: 84.562 - mean_q: -111.569 Interval 7100 (3549500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9543 6 episodes - episode_reward: -141.363 [-217.903, 57.633] - loss: 13.574 - mae: 84.520 - mean_q: -111.546 Interval 7101 (3550000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9994 8 episodes - episode_reward: -192.169 [-262.263, -125.371] - loss: 13.771 - mae: 84.496 - mean_q: -111.518 Interval 7102 (3550500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1291 8 episodes - episode_reward: -206.853 [-321.660, -125.485] - loss: 12.769 - mae: 84.458 - mean_q: -111.493 Interval 7103 (3551000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8842 8 episodes - episode_reward: -179.843 [-234.998, -124.431] - loss: 13.116 - mae: 84.428 - mean_q: -111.463 Interval 7104 (3551500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9854 8 episodes - episode_reward: -186.878 [-235.279, -136.000] - loss: 6.665 - mae: 84.359 - mean_q: -111.483 Interval 7105 (3552000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3494 8 episodes - episode_reward: -137.778 [-193.414, 20.068] - loss: 13.968 - mae: 84.364 - mean_q: -111.497 Interval 7106 (3552500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8992 9 episodes - episode_reward: -167.192 [-221.634, -100.000] - loss: 12.092 - mae: 84.319 - mean_q: -111.478 Interval 7107 (3553000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7058 8 episodes - episode_reward: -212.833 [-631.327, -100.000] - loss: 10.789 - mae: 84.297 - mean_q: -111.459 Interval 7108 (3553500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7330 4 episodes - episode_reward: -615.902 [-804.298, -100.195] - loss: 11.427 - mae: 84.287 - mean_q: -111.430 Interval 7109 (3554000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6653 6 episodes - episode_reward: -299.441 [-777.278, -100.000] - loss: 10.606 - mae: 84.279 - mean_q: -111.439 Interval 7110 (3554500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6534 7 episodes - episode_reward: -193.834 [-262.706, -137.301] - loss: 12.351 - mae: 84.298 - mean_q: -111.473 Interval 7111 (3555000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6942 7 episodes - episode_reward: -202.927 [-244.352, -146.049] - loss: 10.352 - mae: 84.292 - mean_q: -111.467 Interval 7112 (3555500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3116 7 episodes - episode_reward: -166.633 [-213.470, -101.726] - loss: 10.868 - mae: 84.300 - mean_q: -111.468 Interval 7113 (3556000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0299 6 episodes - episode_reward: -163.241 [-195.813, -136.284] - loss: 12.630 - mae: 84.321 - mean_q: -111.469 Interval 7114 (3556500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7523 7 episodes - episode_reward: -207.080 [-311.988, -150.760] - loss: 9.549 - mae: 84.316 - mean_q: -111.473 Interval 7115 (3557000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0311 7 episodes - episode_reward: -196.778 [-292.637, -109.743] - loss: 13.876 - mae: 84.362 - mean_q: -111.478 Interval 7116 (3557500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3310 8 episodes - episode_reward: -154.392 [-228.459, -91.531] - loss: 11.579 - mae: 84.355 - mean_q: -111.456 Interval 7117 (3558000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5720 7 episodes - episode_reward: -186.197 [-265.577, -120.351] - loss: 8.640 - mae: 84.346 - mean_q: -111.437 Interval 7118 (3558500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5898 8 episodes - episode_reward: -160.618 [-237.702, -18.600] - loss: 9.911 - mae: 84.355 - mean_q: -111.437 Interval 7119 (3559000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6081 7 episodes - episode_reward: -177.240 [-272.071, -131.725] - loss: 13.555 - mae: 84.360 - mean_q: -111.423 Interval 7120 (3559500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1598 8 episodes - episode_reward: -196.726 [-225.173, -162.240] - loss: 13.021 - mae: 84.357 - mean_q: -111.404 Interval 7121 (3560000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5349 9 episodes - episode_reward: -203.860 [-273.819, -100.000] - loss: 9.507 - mae: 84.351 - mean_q: -111.388 Interval 7122 (3560500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7669 8 episodes - episode_reward: -165.284 [-284.513, 10.670] - loss: 11.211 - mae: 84.347 - mean_q: -111.401 Interval 7123 (3561000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6233 7 episodes - episode_reward: -189.544 [-225.399, -131.472] - loss: 12.490 - mae: 84.361 - mean_q: -111.360 Interval 7124 (3561500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3507 7 episodes - episode_reward: -171.190 [-232.832, -43.723] - loss: 10.524 - mae: 84.342 - mean_q: -111.352 Interval 7125 (3562000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0087 8 episodes - episode_reward: -186.887 [-291.404, -100.000] - loss: 13.080 - mae: 84.355 - mean_q: -111.339 Interval 7126 (3562500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2689 7 episodes - episode_reward: -163.800 [-270.703, -95.622] - loss: 15.833 - mae: 84.352 - mean_q: -111.304 Interval 7127 (3563000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3034 9 episodes - episode_reward: -187.707 [-244.325, -131.529] - loss: 11.649 - mae: 84.337 - mean_q: -111.283 Interval 7128 (3563500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9119 8 episodes - episode_reward: -179.413 [-249.105, -109.023] - loss: 10.895 - mae: 84.311 - mean_q: -111.264 Interval 7129 (3564000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5779 6 episodes - episode_reward: -203.262 [-230.974, -160.352] - loss: 10.052 - mae: 84.303 - mean_q: -111.284 Interval 7130 (3564500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0630 9 episodes - episode_reward: -177.077 [-242.590, -100.000] - loss: 10.135 - mae: 84.302 - mean_q: -111.282 Interval 7131 (3565000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9022 8 episodes - episode_reward: -180.655 [-245.358, -100.000] - loss: 8.102 - mae: 84.289 - mean_q: -111.280 Interval 7132 (3565500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4173 10 episodes - episode_reward: -174.815 [-253.196, -113.636] - loss: 12.302 - mae: 84.283 - mean_q: -111.270 Interval 7133 (3566000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6792 10 episodes - episode_reward: -130.086 [-185.224, -18.497] - loss: 10.481 - mae: 84.267 - mean_q: -111.252 Interval 7134 (3566500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9515 9 episodes - episode_reward: -166.491 [-302.332, -31.940] - loss: 14.374 - mae: 84.284 - mean_q: -111.223 Interval 7135 (3567000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0127 9 episodes - episode_reward: -164.257 [-214.148, -100.000] - loss: 8.904 - mae: 84.265 - mean_q: -111.201 Interval 7136 (3567500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0451 5 episodes - episode_reward: -194.959 [-230.960, -144.425] - loss: 13.345 - mae: 84.279 - mean_q: -111.170 Interval 7137 (3568000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2692 10 episodes - episode_reward: -174.306 [-283.235, -100.000] - loss: 13.715 - mae: 84.268 - mean_q: -111.138 Interval 7138 (3568500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5174 7 episodes - episode_reward: -167.346 [-245.047, -46.906] - loss: 13.881 - mae: 84.259 - mean_q: -111.120 Interval 7139 (3569000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3667 8 episodes - episode_reward: -157.011 [-186.747, -108.185] - loss: 10.944 - mae: 84.222 - mean_q: -111.084 Interval 7140 (3569500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8235 7 episodes - episode_reward: -187.565 [-276.027, -93.313] - loss: 14.704 - mae: 84.218 - mean_q: -111.054 Interval 7141 (3570000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8113 10 episodes - episode_reward: -147.679 [-242.912, -33.785] - loss: 11.067 - mae: 84.183 - mean_q: -111.035 Interval 7142 (3570500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9280 8 episodes - episode_reward: -180.848 [-209.055, -147.776] - loss: 8.123 - mae: 84.116 - mean_q: -111.024 Interval 7143 (3571000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3988 7 episodes - episode_reward: -159.915 [-235.828, -96.527] - loss: 11.287 - mae: 84.091 - mean_q: -111.016 Interval 7144 (3571500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7588 8 episodes - episode_reward: -167.754 [-226.374, -100.000] - loss: 14.795 - mae: 84.063 - mean_q: -110.981 Interval 7145 (3572000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2461 6 episodes - episode_reward: -191.347 [-288.603, -131.865] - loss: 8.641 - mae: 83.992 - mean_q: -110.947 Interval 7146 (3572500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0844 10 episodes - episode_reward: -160.130 [-270.663, -72.319] - loss: 9.397 - mae: 83.958 - mean_q: -110.959 Interval 7147 (3573000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0328 9 episodes - episode_reward: -169.673 [-233.270, -59.955] - loss: 11.995 - mae: 83.945 - mean_q: -110.924 Interval 7148 (3573500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5297 7 episodes - episode_reward: -183.819 [-283.098, -133.095] - loss: 12.189 - mae: 83.925 - mean_q: -110.902 Interval 7149 (3574000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5534 7 episodes - episode_reward: -189.153 [-362.453, -11.436] - loss: 12.913 - mae: 83.902 - mean_q: -110.862 Interval 7150 (3574500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0189 8 episodes - episode_reward: -181.764 [-238.302, -115.299] - loss: 12.231 - mae: 83.860 - mean_q: -110.842 Interval 7151 (3575000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1833 6 episodes - episode_reward: -183.535 [-298.704, -103.086] - loss: 10.665 - mae: 83.835 - mean_q: -110.851 Interval 7152 (3575500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7385 8 episodes - episode_reward: -174.615 [-273.905, -100.000] - loss: 11.686 - mae: 83.808 - mean_q: -110.853 Interval 7153 (3576000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5642 8 episodes - episode_reward: -156.636 [-307.163, 4.974] - loss: 13.254 - mae: 83.801 - mean_q: -110.825 Interval 7154 (3576500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4676 7 episodes - episode_reward: -180.802 [-291.179, 85.168] - loss: 12.217 - mae: 83.769 - mean_q: -110.797 Interval 7155 (3577000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2565 6 episodes - episode_reward: -173.903 [-223.771, -97.339] - loss: 14.642 - mae: 83.755 - mean_q: -110.757 Interval 7156 (3577500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4851 9 episodes - episode_reward: -197.396 [-239.967, -160.146] - loss: 11.339 - mae: 83.726 - mean_q: -110.726 Interval 7157 (3578000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4855 11 episodes - episode_reward: -162.752 [-229.724, -100.000] - loss: 9.264 - mae: 83.693 - mean_q: -110.720 Interval 7158 (3578500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7270 8 episodes - episode_reward: -167.790 [-234.431, -100.127] - loss: 11.818 - mae: 83.692 - mean_q: -110.715 Interval 7159 (3579000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0131 8 episodes - episode_reward: -190.431 [-285.992, -100.000] - loss: 12.132 - mae: 83.661 - mean_q: -110.670 Interval 7160 (3579500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7783 6 episodes - episode_reward: -151.471 [-270.421, 16.242] - loss: 13.953 - mae: 83.630 - mean_q: -110.653 Interval 7161 (3580000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1592 7 episodes - episode_reward: -149.592 [-224.054, 62.450] - loss: 12.547 - mae: 83.604 - mean_q: -110.630 Interval 7162 (3580500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7096 8 episodes - episode_reward: -159.427 [-193.501, -111.847] - loss: 13.993 - mae: 83.587 - mean_q: -110.599 Interval 7163 (3581000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6328 8 episodes - episode_reward: -178.206 [-245.799, -117.471] - loss: 11.523 - mae: 83.578 - mean_q: -110.562 Interval 7164 (3581500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5918 8 episodes - episode_reward: -159.885 [-238.508, -106.193] - loss: 11.904 - mae: 83.559 - mean_q: -110.541 Interval 7165 (3582000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5110 7 episodes - episode_reward: -188.784 [-264.620, -98.485] - loss: 12.314 - mae: 83.541 - mean_q: -110.518 Interval 7166 (3582500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4935 7 episodes - episode_reward: -165.146 [-248.513, -97.809] - loss: 14.216 - mae: 83.515 - mean_q: -110.494 Interval 7167 (3583000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2704 9 episodes - episode_reward: -180.493 [-210.695, -136.395] - loss: 11.271 - mae: 83.495 - mean_q: -110.474 Interval 7168 (3583500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4140 6 episodes - episode_reward: -209.164 [-268.693, -144.349] - loss: 8.917 - mae: 83.466 - mean_q: -110.467 Interval 7169 (3584000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1537 9 episodes - episode_reward: -171.833 [-226.234, -118.239] - loss: 11.831 - mae: 83.488 - mean_q: -110.467 Interval 7170 (3584500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4454 7 episodes - episode_reward: -178.304 [-250.996, -122.463] - loss: 14.920 - mae: 83.479 - mean_q: -110.439 Interval 7171 (3585000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3037 7 episodes - episode_reward: -167.632 [-246.890, -55.401] - loss: 10.800 - mae: 83.435 - mean_q: -110.419 Interval 7172 (3585500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7759 7 episodes - episode_reward: -188.826 [-222.648, -138.429] - loss: 12.534 - mae: 83.437 - mean_q: -110.403 Interval 7173 (3586000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9582 8 episodes - episode_reward: -186.035 [-223.287, -135.747] - loss: 11.301 - mae: 83.414 - mean_q: -110.375 Interval 7174 (3586500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6844 9 episodes - episode_reward: -194.207 [-238.396, -100.000] - loss: 8.432 - mae: 83.384 - mean_q: -110.383 Interval 7175 (3587000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7704 10 episodes - episode_reward: -200.540 [-292.450, -100.000] - loss: 11.717 - mae: 83.382 - mean_q: -110.388 Interval 7176 (3587500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0912 6 episodes - episode_reward: -175.909 [-203.562, -111.173] - loss: 10.488 - mae: 83.352 - mean_q: -110.390 Interval 7177 (3588000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9357 7 episodes - episode_reward: -203.757 [-316.795, -114.131] - loss: 10.535 - mae: 83.346 - mean_q: -110.398 Interval 7178 (3588500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4496 7 episodes - episode_reward: -179.083 [-244.187, -124.262] - loss: 9.693 - mae: 83.355 - mean_q: -110.396 Interval 7179 (3589000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7991 7 episodes - episode_reward: -194.999 [-230.980, -163.965] - loss: 10.786 - mae: 83.339 - mean_q: -110.378 Interval 7180 (3589500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1007 9 episodes - episode_reward: -177.610 [-346.481, -100.543] - loss: 10.838 - mae: 83.340 - mean_q: -110.385 Interval 7181 (3590000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6211 7 episodes - episode_reward: -185.638 [-238.802, -144.439] - loss: 13.080 - mae: 83.345 - mean_q: -110.386 Interval 7182 (3590500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4369 15 episodes - episode_reward: -148.176 [-268.037, -100.000] - loss: 8.717 - mae: 83.312 - mean_q: -110.411 Interval 7183 (3591000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2764 7 episodes - episode_reward: -152.401 [-196.692, -62.174] - loss: 9.733 - mae: 83.315 - mean_q: -110.425 Interval 7184 (3591500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3084 8 episodes - episode_reward: -330.711 [-906.286, -142.899] - loss: 10.119 - mae: 83.310 - mean_q: -110.438 Interval 7185 (3592000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0565 6 episodes - episode_reward: -170.626 [-263.959, -85.714] - loss: 11.228 - mae: 83.323 - mean_q: -110.440 Interval 7186 (3592500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9046 9 episodes - episode_reward: -168.498 [-231.979, -8.570] - loss: 10.554 - mae: 83.323 - mean_q: -110.470 Interval 7187 (3593000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9022 8 episodes - episode_reward: -179.384 [-225.315, -150.604] - loss: 9.852 - mae: 83.360 - mean_q: -110.493 Interval 7188 (3593500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8191 8 episodes - episode_reward: -176.957 [-212.717, -132.257] - loss: 11.194 - mae: 83.382 - mean_q: -110.493 Interval 7189 (3594000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.3439 11 episodes - episode_reward: -194.674 [-297.772, -100.000] - loss: 11.760 - mae: 83.386 - mean_q: -110.492 Interval 7190 (3594500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1170 5 episodes - episode_reward: -215.646 [-272.287, -168.959] - loss: 8.604 - mae: 83.385 - mean_q: -110.516 Interval 7191 (3595000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9984 8 episodes - episode_reward: -186.301 [-257.370, -136.167] - loss: 10.703 - mae: 83.412 - mean_q: -110.527 Interval 7192 (3595500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3126 9 episodes - episode_reward: -190.694 [-254.173, -100.000] - loss: 9.997 - mae: 83.433 - mean_q: -110.515 Interval 7193 (3596000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6407 6 episodes - episode_reward: -221.661 [-346.403, -151.952] - loss: 13.536 - mae: 83.450 - mean_q: -110.517 Interval 7194 (3596500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0123 5 episodes - episode_reward: -182.605 [-223.777, -143.153] - loss: 10.496 - mae: 83.437 - mean_q: -110.505 Interval 7195 (3597000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5509 9 episodes - episode_reward: -207.688 [-251.026, -106.606] - loss: 11.329 - mae: 83.434 - mean_q: -110.484 Interval 7196 (3597500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9429 8 episodes - episode_reward: -176.496 [-244.239, -111.262] - loss: 11.768 - mae: 83.438 - mean_q: -110.476 Interval 7197 (3598000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7973 8 episodes - episode_reward: -177.201 [-246.774, -125.977] - loss: 12.355 - mae: 83.427 - mean_q: -110.466 Interval 7198 (3598500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7227 7 episodes - episode_reward: -188.225 [-284.275, -107.826] - loss: 15.148 - mae: 83.431 - mean_q: -110.458 Interval 7199 (3599000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5369 7 episodes - episode_reward: -186.797 [-294.426, -58.370] - loss: 10.644 - mae: 83.409 - mean_q: -110.446 Interval 7200 (3599500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8995 7 episodes - episode_reward: -202.062 [-229.472, -166.939] - loss: 11.074 - mae: 83.403 - mean_q: -110.455 Interval 7201 (3600000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1046 10 episodes - episode_reward: -155.677 [-244.658, -100.000] - loss: 14.297 - mae: 83.415 - mean_q: -110.444 Interval 7202 (3600500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9183 9 episodes - episode_reward: -165.206 [-215.715, -120.146] - loss: 13.160 - mae: 83.403 - mean_q: -110.425 Interval 7203 (3601000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1646 9 episodes - episode_reward: -176.857 [-246.941, -138.338] - loss: 9.604 - mae: 83.382 - mean_q: -110.422 Interval 7204 (3601500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8280 9 episodes - episode_reward: -151.111 [-200.251, -121.105] - loss: 12.742 - mae: 83.396 - mean_q: -110.430 Interval 7205 (3602000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0728 8 episodes - episode_reward: -201.913 [-258.037, -153.448] - loss: 12.944 - mae: 83.384 - mean_q: -110.428 Interval 7206 (3602500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4534 8 episodes - episode_reward: -213.386 [-293.553, -151.741] - loss: 11.461 - mae: 83.382 - mean_q: -110.426 Interval 7207 (3603000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6459 7 episodes - episode_reward: -174.134 [-269.063, -122.478] - loss: 8.722 - mae: 83.337 - mean_q: -110.438 Interval 7208 (3603500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8166 8 episodes - episode_reward: -180.643 [-232.914, -118.355] - loss: 11.563 - mae: 83.321 - mean_q: -110.472 Interval 7209 (3604000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3987 7 episodes - episode_reward: -357.417 [-726.744, -207.057] - loss: 12.554 - mae: 83.292 - mean_q: -110.458 Interval 7210 (3604500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7218 7 episodes - episode_reward: -305.789 [-567.153, -111.515] - loss: 9.481 - mae: 83.276 - mean_q: -110.447 Interval 7211 (3605000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0054 4 episodes - episode_reward: -607.470 [-1509.908, -206.725] - loss: 11.536 - mae: 83.298 - mean_q: -110.464 Interval 7212 (3605500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0025 9 episodes - episode_reward: -172.492 [-242.037, -98.669] - loss: 11.134 - mae: 83.308 - mean_q: -110.495 Interval 7213 (3606000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4947 9 episodes - episode_reward: -197.711 [-288.052, -137.743] - loss: 11.603 - mae: 83.355 - mean_q: -110.509 Interval 7214 (3606500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.6146 9 episodes - episode_reward: -190.921 [-361.360, -116.945] - loss: 11.062 - mae: 83.362 - mean_q: -110.527 Interval 7215 (3607000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8189 9 episodes - episode_reward: -219.345 [-393.466, -174.070] - loss: 10.108 - mae: 83.364 - mean_q: -110.551 Interval 7216 (3607500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5138 6 episodes - episode_reward: -372.654 [-1026.124, -159.732] - loss: 9.593 - mae: 83.391 - mean_q: -110.607 Interval 7217 (3608000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.9407 7 episodes - episode_reward: -415.246 [-737.574, -99.536] - loss: 11.205 - mae: 83.447 - mean_q: -110.650 Interval 7218 (3608500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4434 9 episodes - episode_reward: -204.081 [-315.075, -138.784] - loss: 9.452 - mae: 83.492 - mean_q: -110.680 Interval 7219 (3609000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8165 7 episodes - episode_reward: -195.165 [-301.913, -156.308] - loss: 12.304 - mae: 83.555 - mean_q: -110.721 Interval 7220 (3609500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5584 8 episodes - episode_reward: -222.550 [-398.383, -142.753] - loss: 10.596 - mae: 83.590 - mean_q: -110.752 Interval 7221 (3610000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4996 7 episodes - episode_reward: -176.124 [-268.771, -16.876] - loss: 12.610 - mae: 83.643 - mean_q: -110.780 Interval 7222 (3610500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8781 7 episodes - episode_reward: -215.395 [-335.476, -121.695] - loss: 9.331 - mae: 83.657 - mean_q: -110.804 Interval 7223 (3611000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7122 9 episodes - episode_reward: -199.975 [-278.677, -108.436] - loss: 9.380 - mae: 83.706 - mean_q: -110.862 Interval 7224 (3611500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8537 8 episodes - episode_reward: -178.998 [-215.575, -142.723] - loss: 10.914 - mae: 83.764 - mean_q: -110.910 Interval 7225 (3612000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5914 6 episodes - episode_reward: -194.444 [-294.110, -144.899] - loss: 12.673 - mae: 83.818 - mean_q: -110.944 Interval 7226 (3612500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4118 9 episodes - episode_reward: -201.769 [-292.018, -124.435] - loss: 9.505 - mae: 83.851 - mean_q: -110.990 Interval 7227 (3613000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5213 10 episodes - episode_reward: -179.482 [-210.588, -118.956] - loss: 11.538 - mae: 83.905 - mean_q: -111.059 Interval 7228 (3613500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5129 8 episodes - episode_reward: -153.991 [-225.306, -100.000] - loss: 10.331 - mae: 83.943 - mean_q: -111.100 Interval 7229 (3614000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9081 8 episodes - episode_reward: -183.262 [-263.338, -100.000] - loss: 9.380 - mae: 83.968 - mean_q: -111.164 Interval 7230 (3614500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4483 9 episodes - episode_reward: -137.498 [-182.075, 5.582] - loss: 10.035 - mae: 84.010 - mean_q: -111.230 Interval 7231 (3615000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6839 8 episodes - episode_reward: -165.933 [-259.642, -100.000] - loss: 11.070 - mae: 84.057 - mean_q: -111.282 Interval 7232 (3615500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2615 8 episodes - episode_reward: -200.013 [-308.654, -110.935] - loss: 9.253 - mae: 84.113 - mean_q: -111.311 Interval 7233 (3616000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2069 7 episodes - episode_reward: -165.205 [-239.861, -6.826] - loss: 9.931 - mae: 84.149 - mean_q: -111.373 Interval 7234 (3616500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1339 8 episodes - episode_reward: -197.018 [-296.146, -142.391] - loss: 15.487 - mae: 84.211 - mean_q: -111.389 Interval 7235 (3617000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6741 7 episodes - episode_reward: -178.368 [-214.082, -97.378] - loss: 11.154 - mae: 84.228 - mean_q: -111.421 Interval 7236 (3617500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6503 8 episodes - episode_reward: -172.083 [-241.752, -84.094] - loss: 10.408 - mae: 84.251 - mean_q: -111.465 Interval 7237 (3618000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5717 7 episodes - episode_reward: -173.908 [-248.051, -88.434] - loss: 11.704 - mae: 84.292 - mean_q: -111.492 Interval 7238 (3618500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8721 7 episodes - episode_reward: -219.072 [-314.231, -127.065] - loss: 12.236 - mae: 84.340 - mean_q: -111.537 Interval 7239 (3619000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5806 8 episodes - episode_reward: -161.542 [-205.109, -60.652] - loss: 12.171 - mae: 84.379 - mean_q: -111.588 Interval 7240 (3619500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9972 8 episodes - episode_reward: -185.139 [-287.227, -123.995] - loss: 10.748 - mae: 84.393 - mean_q: -111.641 Interval 7241 (3620000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9978 9 episodes - episode_reward: -168.331 [-217.474, -124.388] - loss: 12.873 - mae: 84.419 - mean_q: -111.666 Interval 7242 (3620500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7101 7 episodes - episode_reward: -188.882 [-243.135, -134.291] - loss: 11.403 - mae: 84.459 - mean_q: -111.709 Interval 7243 (3621000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8419 8 episodes - episode_reward: -166.680 [-319.212, -75.334] - loss: 15.254 - mae: 84.505 - mean_q: -111.748 Interval 7244 (3621500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2605 7 episodes - episode_reward: -179.047 [-301.786, -108.499] - loss: 11.051 - mae: 84.526 - mean_q: -111.767 Interval 7245 (3622000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9047 7 episodes - episode_reward: -136.484 [-241.124, 42.125] - loss: 12.421 - mae: 84.560 - mean_q: -111.792 Interval 7246 (3622500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6814 7 episodes - episode_reward: -187.888 [-256.358, -31.473] - loss: 8.860 - mae: 84.566 - mean_q: -111.845 Interval 7247 (3623000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5649 7 episodes - episode_reward: -177.194 [-274.300, -115.344] - loss: 13.139 - mae: 84.605 - mean_q: -111.911 Interval 7248 (3623500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2563 6 episodes - episode_reward: -187.975 [-257.205, -18.918] - loss: 9.232 - mae: 84.624 - mean_q: -111.961 Interval 7249 (3624000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9863 9 episodes - episode_reward: -167.799 [-264.120, -100.000] - loss: 12.553 - mae: 84.676 - mean_q: -111.998 Interval 7250 (3624500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8543 8 episodes - episode_reward: -177.536 [-327.153, -46.971] - loss: 13.443 - mae: 84.696 - mean_q: -111.992 Interval 7251 (3625000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8113 8 episodes - episode_reward: -182.770 [-300.673, -142.460] - loss: 13.913 - mae: 84.722 - mean_q: -111.997 Interval 7252 (3625500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2856 9 episodes - episode_reward: -177.641 [-253.923, -100.000] - loss: 8.476 - mae: 84.734 - mean_q: -112.036 Interval 7253 (3626000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7031 7 episodes - episode_reward: -182.080 [-220.582, -139.712] - loss: 10.843 - mae: 84.750 - mean_q: -112.120 Interval 7254 (3626500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5746 7 episodes - episode_reward: -195.752 [-287.017, -106.576] - loss: 11.610 - mae: 84.783 - mean_q: -112.157 Interval 7255 (3627000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9290 7 episodes - episode_reward: -194.056 [-254.336, -126.108] - loss: 9.993 - mae: 84.794 - mean_q: -112.194 Interval 7256 (3627500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5140 8 episodes - episode_reward: -176.501 [-287.117, -87.555] - loss: 10.746 - mae: 84.823 - mean_q: -112.243 Interval 7257 (3628000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1359 6 episodes - episode_reward: -176.152 [-227.906, -121.526] - loss: 11.588 - mae: 84.851 - mean_q: -112.270 Interval 7258 (3628500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5001 9 episodes - episode_reward: -137.910 [-274.314, -27.024] - loss: 9.369 - mae: 84.881 - mean_q: -112.294 Interval 7259 (3629000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4638 7 episodes - episode_reward: -171.245 [-218.593, -132.812] - loss: 11.152 - mae: 84.894 - mean_q: -112.313 Interval 7260 (3629500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9389 9 episodes - episode_reward: -163.629 [-214.055, -100.000] - loss: 9.382 - mae: 84.904 - mean_q: -112.353 Interval 7261 (3630000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0949 8 episodes - episode_reward: -198.952 [-289.580, -145.329] - loss: 9.125 - mae: 84.932 - mean_q: -112.414 Interval 7262 (3630500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5533 7 episodes - episode_reward: -177.614 [-249.872, -132.125] - loss: 8.922 - mae: 84.946 - mean_q: -112.485 Interval 7263 (3631000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9403 8 episodes - episode_reward: -181.240 [-254.401, -122.169] - loss: 10.100 - mae: 84.991 - mean_q: -112.528 Interval 7264 (3631500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7789 10 episodes - episode_reward: -192.543 [-369.648, -134.233] - loss: 10.739 - mae: 85.019 - mean_q: -112.564 Interval 7265 (3632000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0598 9 episodes - episode_reward: -163.673 [-241.757, -100.000] - loss: 9.342 - mae: 85.039 - mean_q: -112.599 Interval 7266 (3632500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2516 9 episodes - episode_reward: -183.790 [-260.104, -126.587] - loss: 10.123 - mae: 85.090 - mean_q: -112.637 Interval 7267 (3633000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0039 7 episodes - episode_reward: -147.121 [-227.041, -91.075] - loss: 9.974 - mae: 85.110 - mean_q: -112.675 Interval 7268 (3633500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0334 8 episodes - episode_reward: -186.544 [-270.298, -93.608] - loss: 10.775 - mae: 85.165 - mean_q: -112.710 Interval 7269 (3634000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1570 8 episodes - episode_reward: -200.455 [-258.807, -137.130] - loss: 8.175 - mae: 85.191 - mean_q: -112.774 Interval 7270 (3634500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2719 9 episodes - episode_reward: -180.802 [-250.061, -110.503] - loss: 9.555 - mae: 85.249 - mean_q: -112.832 Interval 7271 (3635000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3092 8 episodes - episode_reward: -210.184 [-314.234, -157.337] - loss: 12.903 - mae: 85.294 - mean_q: -112.872 Interval 7272 (3635500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7485 7 episodes - episode_reward: -115.602 [-185.022, 6.965] - loss: 10.083 - mae: 85.325 - mean_q: -112.933 Interval 7273 (3636000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0713 7 episodes - episode_reward: -155.597 [-217.847, 13.426] - loss: 11.727 - mae: 85.359 - mean_q: -112.963 Interval 7274 (3636500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2665 7 episodes - episode_reward: -159.265 [-232.993, -97.974] - loss: 11.194 - mae: 85.378 - mean_q: -113.014 Interval 7275 (3637000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4208 8 episodes - episode_reward: -153.392 [-209.624, -110.593] - loss: 12.371 - mae: 85.417 - mean_q: -113.031 Interval 7276 (3637500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8657 8 episodes - episode_reward: -178.222 [-242.059, -26.695] - loss: 12.561 - mae: 85.453 - mean_q: -113.043 Interval 7277 (3638000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1568 8 episodes - episode_reward: -199.103 [-264.553, -100.000] - loss: 11.072 - mae: 85.472 - mean_q: -113.056 Interval 7278 (3638500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2709 6 episodes - episode_reward: -175.199 [-256.328, -100.000] - loss: 9.196 - mae: 85.497 - mean_q: -113.093 Interval 7279 (3639000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.4897 10 episodes - episode_reward: -175.549 [-223.636, -100.000] - loss: 9.852 - mae: 85.513 - mean_q: -113.142 Interval 7280 (3639500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0247 9 episodes - episode_reward: -176.405 [-236.773, -100.000] - loss: 9.870 - mae: 85.540 - mean_q: -113.178 Interval 7281 (3640000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2858 6 episodes - episode_reward: -190.548 [-236.049, -154.897] - loss: 11.694 - mae: 85.574 - mean_q: -113.197 Interval 7282 (3640500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4302 5 episodes - episode_reward: -232.568 [-266.454, -211.815] - loss: 11.878 - mae: 85.589 - mean_q: -113.208 Interval 7283 (3641000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6148 9 episodes - episode_reward: -140.242 [-216.191, -50.973] - loss: 11.915 - mae: 85.598 - mean_q: -113.201 Interval 7284 (3641500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7895 9 episodes - episode_reward: -156.971 [-261.916, 58.794] - loss: 11.524 - mae: 85.585 - mean_q: -113.199 Interval 7285 (3642000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6262 9 episodes - episode_reward: -151.694 [-212.061, -14.025] - loss: 8.528 - mae: 85.592 - mean_q: -113.194 Interval 7286 (3642500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0220 11 episodes - episode_reward: -140.021 [-211.391, -58.737] - loss: 13.309 - mae: 85.637 - mean_q: -113.191 Interval 7287 (3643000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8801 7 episodes - episode_reward: -200.341 [-289.285, -157.024] - loss: 11.484 - mae: 85.653 - mean_q: -113.174 Interval 7288 (3643500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4525 9 episodes - episode_reward: -180.029 [-251.217, -115.082] - loss: 10.084 - mae: 85.647 - mean_q: -113.181 Interval 7289 (3644000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0182 9 episodes - episode_reward: -184.299 [-268.227, -127.413] - loss: 10.351 - mae: 85.650 - mean_q: -113.205 Interval 7290 (3644500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5532 6 episodes - episode_reward: -197.007 [-278.192, -126.243] - loss: 11.787 - mae: 85.669 - mean_q: -113.195 Interval 7291 (3645000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1853 6 episodes - episode_reward: -195.892 [-240.246, -152.214] - loss: 12.212 - mae: 85.679 - mean_q: -113.193 Interval 7292 (3645500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2100 7 episodes - episode_reward: -157.189 [-197.161, -100.000] - loss: 10.117 - mae: 85.664 - mean_q: -113.176 Interval 7293 (3646000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7456 7 episodes - episode_reward: -196.589 [-332.636, -135.486] - loss: 13.845 - mae: 85.678 - mean_q: -113.179 Interval 7294 (3646500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8825 8 episodes - episode_reward: -179.631 [-220.281, -142.613] - loss: 9.875 - mae: 85.666 - mean_q: -113.179 Interval 7295 (3647000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1049 7 episodes - episode_reward: -149.058 [-198.964, -107.158] - loss: 9.570 - mae: 85.665 - mean_q: -113.183 Interval 7296 (3647500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3287 7 episodes - episode_reward: -155.797 [-196.214, -117.941] - loss: 13.644 - mae: 85.697 - mean_q: -113.176 Interval 7297 (3648000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2279 7 episodes - episode_reward: -174.364 [-237.005, -59.170] - loss: 8.335 - mae: 85.661 - mean_q: -113.157 Interval 7298 (3648500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5579 7 episodes - episode_reward: -172.939 [-200.241, -100.000] - loss: 9.250 - mae: 85.665 - mean_q: -113.164 Interval 7299 (3649000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6477 8 episodes - episode_reward: -168.105 [-250.469, -103.995] - loss: 9.236 - mae: 85.663 - mean_q: -113.167 Interval 7300 (3649500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4223 10 episodes - episode_reward: -167.162 [-241.124, -100.000] - loss: 9.358 - mae: 85.672 - mean_q: -113.145 Interval 7301 (3650000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3048 7 episodes - episode_reward: -166.365 [-260.081, -5.197] - loss: 10.969 - mae: 85.670 - mean_q: -113.149 Interval 7302 (3650500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9363 9 episodes - episode_reward: -170.150 [-258.903, -86.260] - loss: 13.441 - mae: 85.689 - mean_q: -113.115 Interval 7303 (3651000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6096 7 episodes - episode_reward: -179.596 [-219.499, -149.302] - loss: 16.105 - mae: 85.692 - mean_q: -113.060 Interval 7304 (3651500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9597 6 episodes - episode_reward: -169.019 [-288.974, 20.317] - loss: 9.473 - mae: 85.660 - mean_q: -113.023 Interval 7305 (3652000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9228 7 episodes - episode_reward: -202.876 [-282.746, -155.363] - loss: 13.739 - mae: 85.673 - mean_q: -113.003 Interval 7306 (3652500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3824 10 episodes - episode_reward: -172.751 [-268.162, -100.000] - loss: 11.769 - mae: 85.649 - mean_q: -112.979 Interval 7307 (3653000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9412 9 episodes - episode_reward: -163.353 [-200.484, -127.249] - loss: 12.312 - mae: 85.643 - mean_q: -112.963 Interval 7308 (3653500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7655 8 episodes - episode_reward: -167.396 [-289.374, -105.180] - loss: 8.283 - mae: 85.609 - mean_q: -112.935 Interval 7309 (3654000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4890 11 episodes - episode_reward: -163.288 [-244.925, -100.000] - loss: 9.303 - mae: 85.591 - mean_q: -112.919 Interval 7310 (3654500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3184 7 episodes - episode_reward: -167.442 [-237.748, -121.307] - loss: 12.633 - mae: 85.568 - mean_q: -112.893 Interval 7311 (3655000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3629 10 episodes - episode_reward: -167.917 [-247.215, -119.100] - loss: 12.043 - mae: 85.524 - mean_q: -112.859 Interval 7312 (3655500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0648 8 episodes - episode_reward: -188.967 [-290.857, -126.665] - loss: 11.543 - mae: 85.495 - mean_q: -112.831 Interval 7313 (3656000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1897 9 episodes - episode_reward: -166.804 [-222.963, -126.083] - loss: 14.740 - mae: 85.460 - mean_q: -112.785 Interval 7314 (3656500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7487 10 episodes - episode_reward: -142.112 [-239.326, 66.449] - loss: 10.078 - mae: 85.387 - mean_q: -112.754 Interval 7315 (3657000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8226 8 episodes - episode_reward: -183.423 [-316.106, -121.677] - loss: 10.107 - mae: 85.334 - mean_q: -112.738 Interval 7316 (3657500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6029 7 episodes - episode_reward: -178.444 [-203.712, -140.023] - loss: 12.780 - mae: 85.277 - mean_q: -112.700 Interval 7317 (3658000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.5798 6 episodes - episode_reward: -463.198 [-649.590, -264.833] - loss: 8.687 - mae: 85.178 - mean_q: -112.609 Interval 7318 (3658500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6898 5 episodes - episode_reward: -472.772 [-940.194, -171.038] - loss: 13.187 - mae: 85.142 - mean_q: -112.550 Interval 7319 (3659000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8676 6 episodes - episode_reward: -314.518 [-555.220, -131.003] - loss: 12.829 - mae: 85.114 - mean_q: -112.549 Interval 7320 (3659500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4098 7 episodes - episode_reward: -171.127 [-258.979, -63.781] - loss: 10.925 - mae: 85.070 - mean_q: -112.526 Interval 7321 (3660000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0323 9 episodes - episode_reward: -175.825 [-237.212, -100.000] - loss: 8.136 - mae: 85.026 - mean_q: -112.492 Interval 7322 (3660500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7141 7 episodes - episode_reward: -196.755 [-281.937, -142.087] - loss: 11.117 - mae: 85.016 - mean_q: -112.441 Interval 7323 (3661000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4449 11 episodes - episode_reward: -159.255 [-206.595, -100.000] - loss: 11.901 - mae: 84.998 - mean_q: -112.389 Interval 7324 (3661500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1722 6 episodes - episode_reward: -177.372 [-207.563, -144.663] - loss: 11.253 - mae: 84.954 - mean_q: -112.328 Interval 7325 (3662000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8290 8 episodes - episode_reward: -172.190 [-202.954, -119.618] - loss: 8.510 - mae: 84.924 - mean_q: -112.275 Interval 7326 (3662500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2122 9 episodes - episode_reward: -180.247 [-312.941, -110.467] - loss: 9.288 - mae: 84.884 - mean_q: -112.230 Interval 7327 (3663000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8312 6 episodes - episode_reward: -228.092 [-396.999, -117.982] - loss: 12.189 - mae: 84.875 - mean_q: -112.178 Interval 7328 (3663500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4739 8 episodes - episode_reward: -218.749 [-316.331, -169.354] - loss: 10.470 - mae: 84.821 - mean_q: -112.126 Interval 7329 (3664000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9056 7 episodes - episode_reward: -204.028 [-228.517, -188.804] - loss: 12.662 - mae: 84.798 - mean_q: -112.091 Interval 7330 (3664500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5905 8 episodes - episode_reward: -189.345 [-208.175, -164.222] - loss: 9.965 - mae: 84.752 - mean_q: -112.051 Interval 7331 (3665000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9138 8 episodes - episode_reward: -158.922 [-257.333, 44.315] - loss: 9.087 - mae: 84.733 - mean_q: -112.048 Interval 7332 (3665500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7213 8 episodes - episode_reward: -169.188 [-234.363, -113.747] - loss: 13.524 - mae: 84.731 - mean_q: -111.992 Interval 7333 (3666000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7087 7 episodes - episode_reward: -193.162 [-303.641, -130.822] - loss: 10.577 - mae: 84.697 - mean_q: -111.941 Interval 7334 (3666500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6856 7 episodes - episode_reward: -187.673 [-254.694, -149.267] - loss: 10.735 - mae: 84.668 - mean_q: -111.895 Interval 7335 (3667000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2870 9 episodes - episode_reward: -194.493 [-241.444, -138.999] - loss: 9.027 - mae: 84.635 - mean_q: -111.871 Interval 7336 (3667500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5509 7 episodes - episode_reward: -181.161 [-254.058, -139.964] - loss: 11.038 - mae: 84.625 - mean_q: -111.847 Interval 7337 (3668000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7182 7 episodes - episode_reward: -178.433 [-286.290, -134.599] - loss: 8.227 - mae: 84.601 - mean_q: -111.838 Interval 7338 (3668500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6564 7 episodes - episode_reward: -191.184 [-244.879, -52.652] - loss: 10.666 - mae: 84.603 - mean_q: -111.827 Interval 7339 (3669000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5104 6 episodes - episode_reward: -189.349 [-276.267, -111.004] - loss: 12.605 - mae: 84.573 - mean_q: -111.769 Interval 7340 (3669500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6938 7 episodes - episode_reward: -223.624 [-357.011, -138.453] - loss: 7.364 - mae: 84.535 - mean_q: -111.727 Interval 7341 (3670000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6248 8 episodes - episode_reward: -159.525 [-262.794, -106.349] - loss: 11.624 - mae: 84.549 - mean_q: -111.711 Interval 7342 (3670500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1550 10 episodes - episode_reward: -159.999 [-250.960, -56.651] - loss: 9.258 - mae: 84.507 - mean_q: -111.667 Interval 7343 (3671000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0999 6 episodes - episode_reward: -162.068 [-216.076, -83.216] - loss: 12.683 - mae: 84.511 - mean_q: -111.618 Interval 7344 (3671500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5070 11 episodes - episode_reward: -161.180 [-239.809, -100.000] - loss: 7.910 - mae: 84.451 - mean_q: -111.584 Interval 7345 (3672000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5817 9 episodes - episode_reward: -150.009 [-268.614, -99.477] - loss: 10.365 - mae: 84.417 - mean_q: -111.563 Interval 7346 (3672500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6299 10 episodes - episode_reward: -181.077 [-260.706, -133.305] - loss: 10.183 - mae: 84.395 - mean_q: -111.529 Interval 7347 (3673000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8025 8 episodes - episode_reward: -165.769 [-254.671, -19.873] - loss: 10.294 - mae: 84.364 - mean_q: -111.500 Interval 7348 (3673500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9071 6 episodes - episode_reward: -168.618 [-286.933, -63.801] - loss: 11.384 - mae: 84.346 - mean_q: -111.481 Interval 7349 (3674000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2097 10 episodes - episode_reward: -159.961 [-241.648, -114.397] - loss: 11.899 - mae: 84.321 - mean_q: -111.434 Interval 7350 (3674500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2794 7 episodes - episode_reward: -167.315 [-325.296, 40.593] - loss: 9.256 - mae: 84.283 - mean_q: -111.404 Interval 7351 (3675000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1409 7 episodes - episode_reward: -214.048 [-276.723, -157.984] - loss: 9.811 - mae: 84.258 - mean_q: -111.371 Interval 7352 (3675500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3457 7 episodes - episode_reward: -183.480 [-242.308, -119.681] - loss: 9.216 - mae: 84.221 - mean_q: -111.348 Interval 7353 (3676000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5790 7 episodes - episode_reward: -165.811 [-230.473, -129.809] - loss: 7.557 - mae: 84.194 - mean_q: -111.349 Interval 7354 (3676500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3806 10 episodes - episode_reward: -179.575 [-251.300, -118.727] - loss: 9.324 - mae: 84.177 - mean_q: -111.338 Interval 7355 (3677000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0324 6 episodes - episode_reward: -145.503 [-214.905, -95.532] - loss: 12.192 - mae: 84.163 - mean_q: -111.288 Interval 7356 (3677500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7331 8 episodes - episode_reward: -180.053 [-252.691, 42.440] - loss: 10.667 - mae: 84.140 - mean_q: -111.245 Interval 7357 (3678000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0851 8 episodes - episode_reward: -194.290 [-320.674, -105.114] - loss: 9.243 - mae: 84.120 - mean_q: -111.225 Interval 7358 (3678500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4656 7 episodes - episode_reward: -176.200 [-235.920, -110.435] - loss: 10.262 - mae: 84.098 - mean_q: -111.221 Interval 7359 (3679000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8817 7 episodes - episode_reward: -209.675 [-265.732, -166.388] - loss: 9.144 - mae: 84.080 - mean_q: -111.203 Interval 7360 (3679500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4292 11 episodes - episode_reward: -157.122 [-260.140, -100.000] - loss: 9.216 - mae: 84.080 - mean_q: -111.190 Interval 7361 (3680000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0551 8 episodes - episode_reward: -184.366 [-277.466, -124.947] - loss: 11.639 - mae: 84.061 - mean_q: -111.171 Interval 7362 (3680500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5266 11 episodes - episode_reward: -163.106 [-215.732, -100.000] - loss: 11.124 - mae: 84.046 - mean_q: -111.140 Interval 7363 (3681000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2948 6 episodes - episode_reward: -277.951 [-379.490, -182.801] - loss: 8.612 - mae: 83.999 - mean_q: -111.133 Interval 7364 (3681500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1120 8 episodes - episode_reward: -193.083 [-279.224, -151.283] - loss: 12.993 - mae: 84.011 - mean_q: -111.118 Interval 7365 (3682000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4434 6 episodes - episode_reward: -204.347 [-298.298, -121.410] - loss: 10.558 - mae: 83.972 - mean_q: -111.097 Interval 7366 (3682500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7597 6 episodes - episode_reward: -158.716 [-200.708, -9.138] - loss: 11.197 - mae: 83.974 - mean_q: -111.090 Interval 7367 (3683000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1897 7 episodes - episode_reward: -140.298 [-285.764, 33.311] - loss: 10.646 - mae: 83.960 - mean_q: -111.072 Interval 7368 (3683500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9098 9 episodes - episode_reward: -167.186 [-209.986, -116.655] - loss: 13.132 - mae: 83.949 - mean_q: -111.031 Interval 7369 (3684000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8165 7 episodes - episode_reward: -202.444 [-365.560, -135.199] - loss: 11.878 - mae: 83.940 - mean_q: -110.993 Interval 7370 (3684500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1602 7 episodes - episode_reward: -149.575 [-239.409, 14.991] - loss: 12.136 - mae: 83.922 - mean_q: -110.970 Interval 7371 (3685000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0607 7 episodes - episode_reward: -153.431 [-208.630, -29.362] - loss: 9.946 - mae: 83.891 - mean_q: -110.951 Interval 7372 (3685500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0483 8 episodes - episode_reward: -184.471 [-241.960, -144.534] - loss: 10.549 - mae: 83.889 - mean_q: -110.939 Interval 7373 (3686000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4646 8 episodes - episode_reward: -158.374 [-212.721, -129.769] - loss: 8.927 - mae: 83.876 - mean_q: -110.930 Interval 7374 (3686500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1345 8 episodes - episode_reward: -195.959 [-383.791, -118.093] - loss: 8.994 - mae: 83.867 - mean_q: -110.940 Interval 7375 (3687000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2897 8 episodes - episode_reward: -199.071 [-258.105, -114.637] - loss: 14.039 - mae: 83.879 - mean_q: -110.933 Interval 7376 (3687500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0866 8 episodes - episode_reward: -135.363 [-205.395, 37.477] - loss: 11.015 - mae: 83.867 - mean_q: -110.904 Interval 7377 (3688000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0997 6 episodes - episode_reward: -168.475 [-259.798, -66.350] - loss: 12.997 - mae: 83.858 - mean_q: -110.874 Interval 7378 (3688500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2220 10 episodes - episode_reward: -163.083 [-254.597, -88.815] - loss: 10.443 - mae: 83.839 - mean_q: -110.869 Interval 7379 (3689000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9140 8 episodes - episode_reward: -183.202 [-299.424, -72.571] - loss: 12.976 - mae: 83.843 - mean_q: -110.858 Interval 7380 (3689500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1075 7 episodes - episode_reward: -217.524 [-336.661, -100.000] - loss: 12.016 - mae: 83.823 - mean_q: -110.828 Interval 7381 (3690000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9029 8 episodes - episode_reward: -181.663 [-259.115, -117.077] - loss: 13.330 - mae: 83.806 - mean_q: -110.807 Interval 7382 (3690500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1972 6 episodes - episode_reward: -201.362 [-283.346, -70.318] - loss: 13.703 - mae: 83.792 - mean_q: -110.790 Interval 7383 (3691000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4450 7 episodes - episode_reward: -162.234 [-217.172, -4.908] - loss: 11.247 - mae: 83.783 - mean_q: -110.789 Interval 7384 (3691500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8745 8 episodes - episode_reward: -175.183 [-233.626, -118.433] - loss: 9.584 - mae: 83.769 - mean_q: -110.793 Interval 7385 (3692000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6201 9 episodes - episode_reward: -143.590 [-193.496, 32.376] - loss: 10.514 - mae: 83.770 - mean_q: -110.802 Interval 7386 (3692500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7043 8 episodes - episode_reward: -178.696 [-220.670, -119.497] - loss: 9.538 - mae: 83.755 - mean_q: -110.796 Interval 7387 (3693000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.6571 11 episodes - episode_reward: -165.367 [-287.371, -100.000] - loss: 11.067 - mae: 83.739 - mean_q: -110.796 Interval 7388 (3693500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1604 6 episodes - episode_reward: -168.222 [-208.891, -76.203] - loss: 10.057 - mae: 83.703 - mean_q: -110.803 Interval 7389 (3694000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5988 7 episodes - episode_reward: -177.744 [-241.626, -71.892] - loss: 12.488 - mae: 83.722 - mean_q: -110.776 Interval 7390 (3694500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5083 7 episodes - episode_reward: -185.225 [-296.977, -123.738] - loss: 15.403 - mae: 83.714 - mean_q: -110.774 Interval 7391 (3695000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5289 8 episodes - episode_reward: -168.048 [-232.298, -126.655] - loss: 13.502 - mae: 83.707 - mean_q: -110.746 Interval 7392 (3695500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9028 8 episodes - episode_reward: -175.755 [-236.807, -139.949] - loss: 13.297 - mae: 83.723 - mean_q: -110.718 Interval 7393 (3696000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6448 6 episodes - episode_reward: -221.109 [-312.060, -133.750] - loss: 13.205 - mae: 83.719 - mean_q: -110.683 Interval 7394 (3696500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9978 9 episodes - episode_reward: -173.230 [-257.854, -110.319] - loss: 10.426 - mae: 83.678 - mean_q: -110.672 Interval 7395 (3697000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3267 6 episodes - episode_reward: -177.806 [-309.401, 8.725] - loss: 15.826 - mae: 83.683 - mean_q: -110.665 Interval 7396 (3697500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8798 8 episodes - episode_reward: -192.031 [-248.249, -139.043] - loss: 10.497 - mae: 83.653 - mean_q: -110.619 Interval 7397 (3698000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0932 7 episodes - episode_reward: -215.499 [-276.511, -153.315] - loss: 12.332 - mae: 83.650 - mean_q: -110.606 Interval 7398 (3698500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5111 10 episodes - episode_reward: -176.916 [-232.886, -100.000] - loss: 11.775 - mae: 83.645 - mean_q: -110.610 Interval 7399 (3699000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0312 8 episodes - episode_reward: -194.414 [-336.863, -129.247] - loss: 12.568 - mae: 83.644 - mean_q: -110.593 Interval 7400 (3699500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3296 7 episodes - episode_reward: -163.688 [-191.820, -127.051] - loss: 10.867 - mae: 83.626 - mean_q: -110.602 Interval 7401 (3700000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8222 7 episodes - episode_reward: -200.185 [-272.787, -113.704] - loss: 10.905 - mae: 83.616 - mean_q: -110.623 Interval 7402 (3700500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7313 7 episodes - episode_reward: -192.148 [-217.805, -129.237] - loss: 15.276 - mae: 83.650 - mean_q: -110.605 Interval 7403 (3701000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1281 10 episodes - episode_reward: -159.417 [-228.229, -100.000] - loss: 11.003 - mae: 83.623 - mean_q: -110.580 Interval 7404 (3701500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7544 8 episodes - episode_reward: -167.647 [-249.216, -105.007] - loss: 10.253 - mae: 83.611 - mean_q: -110.591 Interval 7405 (3702000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1174 8 episodes - episode_reward: -195.267 [-240.592, -153.556] - loss: 12.582 - mae: 83.617 - mean_q: -110.599 Interval 7406 (3702500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2332 8 episodes - episode_reward: -203.624 [-279.435, -117.440] - loss: 6.623 - mae: 83.570 - mean_q: -110.627 Interval 7407 (3703000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4107 7 episodes - episode_reward: -168.940 [-208.864, -129.057] - loss: 14.195 - mae: 83.605 - mean_q: -110.626 Interval 7408 (3703500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2025 10 episodes - episode_reward: -165.759 [-236.891, -100.000] - loss: 9.917 - mae: 83.570 - mean_q: -110.605 Interval 7409 (3704000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8693 6 episodes - episode_reward: -147.966 [-252.158, 40.534] - loss: 11.093 - mae: 83.583 - mean_q: -110.604 Interval 7410 (3704500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0316 8 episodes - episode_reward: -190.926 [-326.755, -149.459] - loss: 12.959 - mae: 83.597 - mean_q: -110.607 Interval 7411 (3705000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6949 8 episodes - episode_reward: -166.130 [-275.142, 0.477] - loss: 10.332 - mae: 83.584 - mean_q: -110.608 Interval 7412 (3705500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6289 7 episodes - episode_reward: -194.147 [-318.906, 11.535] - loss: 11.445 - mae: 83.596 - mean_q: -110.615 Interval 7413 (3706000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5194 7 episodes - episode_reward: -169.368 [-222.553, -84.108] - loss: 9.782 - mae: 83.610 - mean_q: -110.632 Interval 7414 (3706500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0341 7 episodes - episode_reward: -220.908 [-352.376, -131.263] - loss: 15.801 - mae: 83.636 - mean_q: -110.607 Interval 7415 (3707000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3966 6 episodes - episode_reward: -197.794 [-280.899, -158.046] - loss: 11.601 - mae: 83.623 - mean_q: -110.633 Interval 7416 (3707500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.9204 10 episodes - episode_reward: -203.366 [-322.420, -120.241] - loss: 11.977 - mae: 83.634 - mean_q: -110.646 Interval 7417 (3708000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5209 7 episodes - episode_reward: -177.622 [-253.508, -68.658] - loss: 15.779 - mae: 83.618 - mean_q: -110.638 Interval 7418 (3708500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4753 8 episodes - episode_reward: -151.067 [-232.701, -38.897] - loss: 11.731 - mae: 83.563 - mean_q: -110.647 Interval 7419 (3709000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1996 6 episodes - episode_reward: -171.562 [-253.426, -43.911] - loss: 12.232 - mae: 83.527 - mean_q: -110.661 Interval 7420 (3709500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9368 8 episodes - episode_reward: -186.013 [-294.646, -100.000] - loss: 10.127 - mae: 83.475 - mean_q: -110.663 Interval 7421 (3710000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9359 9 episodes - episode_reward: -171.966 [-235.622, -120.784] - loss: 13.266 - mae: 83.443 - mean_q: -110.655 Interval 7422 (3710500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.1733 6 episodes - episode_reward: -429.655 [-909.647, -103.264] - loss: 12.986 - mae: 83.422 - mean_q: -110.651 Interval 7423 (3711000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0181 6 episodes - episode_reward: -414.005 [-731.564, -85.876] - loss: 13.746 - mae: 83.434 - mean_q: -110.652 Interval 7424 (3711500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4969 5 episodes - episode_reward: -220.966 [-321.594, -144.562] - loss: 13.242 - mae: 83.459 - mean_q: -110.677 Interval 7425 (3712000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2250 8 episodes - episode_reward: -219.734 [-451.040, -112.448] - loss: 14.702 - mae: 83.491 - mean_q: -110.680 Interval 7426 (3712500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0911 7 episodes - episode_reward: -219.331 [-359.164, -130.705] - loss: 12.760 - mae: 83.494 - mean_q: -110.682 Interval 7427 (3713000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3992 7 episodes - episode_reward: -166.771 [-213.699, -49.085] - loss: 16.480 - mae: 83.513 - mean_q: -110.672 Interval 7428 (3713500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8075 11 episodes - episode_reward: -178.890 [-240.185, -100.000] - loss: 14.165 - mae: 83.526 - mean_q: -110.656 Interval 7429 (3714000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8464 7 episodes - episode_reward: -194.691 [-305.709, -112.060] - loss: 11.501 - mae: 83.518 - mean_q: -110.645 Interval 7430 (3714500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5882 10 episodes - episode_reward: -183.237 [-238.171, -100.000] - loss: 12.778 - mae: 83.545 - mean_q: -110.650 Interval 7431 (3715000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8138 9 episodes - episode_reward: -158.702 [-220.879, -100.000] - loss: 13.342 - mae: 83.559 - mean_q: -110.657 Interval 7432 (3715500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5073 7 episodes - episode_reward: -173.823 [-327.286, -121.523] - loss: 12.644 - mae: 83.565 - mean_q: -110.656 Interval 7433 (3716000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9392 8 episodes - episode_reward: -188.370 [-212.561, -129.615] - loss: 12.544 - mae: 83.570 - mean_q: -110.639 Interval 7434 (3716500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9828 8 episodes - episode_reward: -192.331 [-242.470, -149.767] - loss: 14.733 - mae: 83.586 - mean_q: -110.640 Interval 7435 (3717000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7116 6 episodes - episode_reward: -216.822 [-337.463, -171.056] - loss: 14.734 - mae: 83.587 - mean_q: -110.631 Interval 7436 (3717500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9947 7 episodes - episode_reward: -207.640 [-308.821, -152.212] - loss: 16.156 - mae: 83.587 - mean_q: -110.607 Interval 7437 (3718000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3844 10 episodes - episode_reward: -175.297 [-307.138, -118.943] - loss: 12.345 - mae: 83.575 - mean_q: -110.618 Interval 7438 (3718500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5741 6 episodes - episode_reward: -196.474 [-232.393, -142.167] - loss: 11.436 - mae: 83.583 - mean_q: -110.629 Interval 7439 (3719000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5151 10 episodes - episode_reward: -178.218 [-244.984, -100.000] - loss: 13.161 - mae: 83.576 - mean_q: -110.622 Interval 7440 (3719500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7252 9 episodes - episode_reward: -213.443 [-267.956, -153.051] - loss: 11.673 - mae: 83.571 - mean_q: -110.624 Interval 7441 (3720000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9947 9 episodes - episode_reward: -113.585 [-211.819, 66.064] - loss: 10.804 - mae: 83.565 - mean_q: -110.660 Interval 7442 (3720500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1516 9 episodes - episode_reward: -172.092 [-209.205, -114.734] - loss: 12.024 - mae: 83.580 - mean_q: -110.686 Interval 7443 (3721000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2729 7 episodes - episode_reward: -166.794 [-223.066, -124.835] - loss: 12.489 - mae: 83.585 - mean_q: -110.703 Interval 7444 (3721500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4711 7 episodes - episode_reward: -171.248 [-303.653, -105.704] - loss: 13.370 - mae: 83.588 - mean_q: -110.694 Interval 7445 (3722000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3147 8 episodes - episode_reward: -145.285 [-240.845, -37.840] - loss: 12.105 - mae: 83.586 - mean_q: -110.716 Interval 7446 (3722500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5010 7 episodes - episode_reward: -174.854 [-243.716, -142.331] - loss: 11.613 - mae: 83.581 - mean_q: -110.706 Interval 7447 (3723000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9698 8 episodes - episode_reward: -188.562 [-253.642, -129.506] - loss: 9.756 - mae: 83.569 - mean_q: -110.725 Interval 7448 (3723500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5192 8 episodes - episode_reward: -219.148 [-344.029, -115.805] - loss: 10.094 - mae: 83.583 - mean_q: -110.769 Interval 7449 (3724000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1484 8 episodes - episode_reward: -198.789 [-277.299, -100.000] - loss: 13.430 - mae: 83.633 - mean_q: -110.763 Interval 7450 (3724500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0792 8 episodes - episode_reward: -186.140 [-259.462, -100.000] - loss: 14.650 - mae: 83.634 - mean_q: -110.726 Interval 7451 (3725000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3787 10 episodes - episode_reward: -171.452 [-297.361, -80.451] - loss: 17.387 - mae: 83.664 - mean_q: -110.716 Interval 7452 (3725500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0578 8 episodes - episode_reward: -197.745 [-290.838, -133.592] - loss: 13.768 - mae: 83.632 - mean_q: -110.702 Interval 7453 (3726000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1194 7 episodes - episode_reward: -221.034 [-290.436, -162.671] - loss: 14.198 - mae: 83.649 - mean_q: -110.710 Interval 7454 (3726500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0425 9 episodes - episode_reward: -169.855 [-258.247, -31.292] - loss: 8.876 - mae: 83.627 - mean_q: -110.730 Interval 7455 (3727000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8714 8 episodes - episode_reward: -174.338 [-244.499, -100.050] - loss: 12.692 - mae: 83.650 - mean_q: -110.765 Interval 7456 (3727500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6847 7 episodes - episode_reward: -193.557 [-278.308, -147.868] - loss: 13.119 - mae: 83.651 - mean_q: -110.785 Interval 7457 (3728000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3540 9 episodes - episode_reward: -187.788 [-312.391, -100.000] - loss: 11.110 - mae: 83.659 - mean_q: -110.802 Interval 7458 (3728500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5277 9 episodes - episode_reward: -132.984 [-203.164, 14.785] - loss: 8.404 - mae: 83.670 - mean_q: -110.846 Interval 7459 (3729000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9456 9 episodes - episode_reward: -172.811 [-241.431, -110.498] - loss: 12.726 - mae: 83.712 - mean_q: -110.856 Interval 7460 (3729500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4139 7 episodes - episode_reward: -165.121 [-218.310, -119.400] - loss: 10.816 - mae: 83.720 - mean_q: -110.863 Interval 7461 (3730000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9808 10 episodes - episode_reward: -151.048 [-212.424, -66.465] - loss: 11.270 - mae: 83.714 - mean_q: -110.890 Interval 7462 (3730500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5923 8 episodes - episode_reward: -225.054 [-271.341, -123.360] - loss: 12.646 - mae: 83.731 - mean_q: -110.905 Interval 7463 (3731000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1360 7 episodes - episode_reward: -156.639 [-258.397, -24.452] - loss: 13.687 - mae: 83.757 - mean_q: -110.904 Interval 7464 (3731500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3716 7 episodes - episode_reward: -164.824 [-215.679, -115.947] - loss: 12.073 - mae: 83.753 - mean_q: -110.899 Interval 7465 (3732000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8442 6 episodes - episode_reward: -139.405 [-211.709, -74.386] - loss: 14.031 - mae: 83.771 - mean_q: -110.898 Interval 7466 (3732500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7523 7 episodes - episode_reward: -209.432 [-340.992, -126.623] - loss: 12.398 - mae: 83.762 - mean_q: -110.889 Interval 7467 (3733000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1312 9 episodes - episode_reward: -178.420 [-257.290, -100.000] - loss: 13.231 - mae: 83.762 - mean_q: -110.879 Interval 7468 (3733500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4587 7 episodes - episode_reward: -168.883 [-217.297, -104.988] - loss: 12.679 - mae: 83.759 - mean_q: -110.902 Interval 7469 (3734000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3141 10 episodes - episode_reward: -169.246 [-213.946, -100.000] - loss: 11.064 - mae: 83.759 - mean_q: -110.917 Interval 7470 (3734500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2630 9 episodes - episode_reward: -181.420 [-263.998, -102.156] - loss: 16.702 - mae: 83.794 - mean_q: -110.905 Interval 7471 (3735000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6451 8 episodes - episode_reward: -164.006 [-224.472, -107.995] - loss: 16.191 - mae: 83.790 - mean_q: -110.887 Interval 7472 (3735500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2388 6 episodes - episode_reward: -182.318 [-242.400, -129.800] - loss: 12.720 - mae: 83.776 - mean_q: -110.875 Interval 7473 (3736000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8287 8 episodes - episode_reward: -176.509 [-279.034, -119.504] - loss: 11.614 - mae: 83.779 - mean_q: -110.868 Interval 7474 (3736500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5249 8 episodes - episode_reward: -159.279 [-288.967, -61.548] - loss: 11.772 - mae: 83.767 - mean_q: -110.884 Interval 7475 (3737000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2326 8 episodes - episode_reward: -191.236 [-236.374, -152.950] - loss: 12.010 - mae: 83.756 - mean_q: -110.894 Interval 7476 (3737500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3441 9 episodes - episode_reward: -193.824 [-247.207, -132.043] - loss: 13.005 - mae: 83.762 - mean_q: -110.882 Interval 7477 (3738000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4366 8 episodes - episode_reward: -206.146 [-274.547, -153.586] - loss: 9.362 - mae: 83.745 - mean_q: -110.876 Interval 7478 (3738500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0465 7 episodes - episode_reward: -164.987 [-258.119, 6.195] - loss: 9.720 - mae: 83.759 - mean_q: -110.895 Interval 7479 (3739000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9167 7 episodes - episode_reward: -205.391 [-282.946, -161.079] - loss: 10.052 - mae: 83.749 - mean_q: -110.909 Interval 7480 (3739500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0001 7 episodes - episode_reward: -211.095 [-403.579, -126.774] - loss: 12.188 - mae: 83.763 - mean_q: -110.909 Interval 7481 (3740000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7649 9 episodes - episode_reward: -149.369 [-241.777, 23.467] - loss: 8.948 - mae: 83.754 - mean_q: -110.935 Interval 7482 (3740500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1366 8 episodes - episode_reward: -190.202 [-268.382, -140.447] - loss: 11.723 - mae: 83.766 - mean_q: -110.939 Interval 7483 (3741000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7480 9 episodes - episode_reward: -162.941 [-225.593, -100.000] - loss: 9.207 - mae: 83.782 - mean_q: -110.981 Interval 7484 (3741500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5978 7 episodes - episode_reward: -328.285 [-520.250, -150.554] - loss: 11.881 - mae: 83.810 - mean_q: -111.019 Interval 7485 (3742000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0134 8 episodes - episode_reward: -181.419 [-284.163, -109.127] - loss: 13.958 - mae: 83.846 - mean_q: -111.017 Interval 7486 (3742500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7664 8 episodes - episode_reward: -179.637 [-361.863, -103.948] - loss: 9.673 - mae: 83.868 - mean_q: -111.000 Interval 7487 (3743000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7351 7 episodes - episode_reward: -193.714 [-244.478, -134.901] - loss: 11.729 - mae: 83.896 - mean_q: -111.024 Interval 7488 (3743500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0229 8 episodes - episode_reward: -179.107 [-250.345, -124.903] - loss: 12.759 - mae: 83.903 - mean_q: -111.026 Interval 7489 (3744000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9444 7 episodes - episode_reward: -168.226 [-246.870, -35.737] - loss: 10.332 - mae: 83.900 - mean_q: -111.040 Interval 7490 (3744500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7729 8 episodes - episode_reward: -153.660 [-260.990, 20.422] - loss: 12.519 - mae: 83.929 - mean_q: -111.044 Interval 7491 (3745000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2153 9 episodes - episode_reward: -183.855 [-328.222, -75.076] - loss: 11.415 - mae: 83.944 - mean_q: -111.059 Interval 7492 (3745500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6609 7 episodes - episode_reward: -181.766 [-212.082, -151.124] - loss: 12.308 - mae: 83.963 - mean_q: -111.061 Interval 7493 (3746000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0243 7 episodes - episode_reward: -211.335 [-260.832, -153.783] - loss: 11.211 - mae: 83.975 - mean_q: -111.065 Interval 7494 (3746500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4225 10 episodes - episode_reward: -169.169 [-259.943, -63.511] - loss: 13.638 - mae: 84.011 - mean_q: -111.080 Interval 7495 (3747000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0898 7 episodes - episode_reward: -161.568 [-250.137, -25.906] - loss: 10.720 - mae: 84.002 - mean_q: -111.073 Interval 7496 (3747500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8518 8 episodes - episode_reward: -177.644 [-295.435, -114.305] - loss: 13.120 - mae: 83.992 - mean_q: -111.086 Interval 7497 (3748000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6444 7 episodes - episode_reward: -196.066 [-263.877, -94.861] - loss: 9.293 - mae: 83.981 - mean_q: -111.111 Interval 7498 (3748500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5703 7 episodes - episode_reward: -178.502 [-233.119, -137.718] - loss: 14.123 - mae: 84.021 - mean_q: -111.088 Interval 7499 (3749000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7553 5 episodes - episode_reward: -165.677 [-267.874, 22.122] - loss: 12.072 - mae: 84.029 - mean_q: -111.070 Interval 7500 (3749500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2142 8 episodes - episode_reward: -207.266 [-347.779, -122.594] - loss: 11.425 - mae: 84.027 - mean_q: -111.075 Interval 7501 (3750000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8086 6 episodes - episode_reward: -219.801 [-243.197, -158.547] - loss: 13.470 - mae: 84.035 - mean_q: -111.083 Interval 7502 (3750500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6010 8 episodes - episode_reward: -169.629 [-261.813, -71.933] - loss: 11.118 - mae: 84.041 - mean_q: -111.101 Interval 7503 (3751000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8369 7 episodes - episode_reward: -197.656 [-296.713, -110.865] - loss: 15.481 - mae: 84.081 - mean_q: -111.081 Interval 7504 (3751500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1271 8 episodes - episode_reward: -203.988 [-283.277, -153.191] - loss: 12.486 - mae: 84.069 - mean_q: -111.070 Interval 7505 (3752000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6851 9 episodes - episode_reward: -147.080 [-238.569, 21.949] - loss: 13.758 - mae: 84.084 - mean_q: -111.075 Interval 7506 (3752500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7633 8 episodes - episode_reward: -170.383 [-197.241, -130.185] - loss: 13.665 - mae: 84.094 - mean_q: -111.067 Interval 7507 (3753000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1488 7 episodes - episode_reward: -156.414 [-226.723, 2.487] - loss: 9.221 - mae: 84.088 - mean_q: -111.090 Interval 7508 (3753500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9157 8 episodes - episode_reward: -180.360 [-258.217, -100.000] - loss: 9.619 - mae: 84.085 - mean_q: -111.134 Interval 7509 (3754000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7826 7 episodes - episode_reward: -189.772 [-333.884, -85.364] - loss: 12.464 - mae: 84.100 - mean_q: -111.191 Interval 7510 (3754500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4117 7 episodes - episode_reward: -178.361 [-293.666, 38.364] - loss: 12.563 - mae: 84.111 - mean_q: -111.221 Interval 7511 (3755000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2465 8 episodes - episode_reward: -205.173 [-274.386, -157.310] - loss: 10.640 - mae: 84.113 - mean_q: -111.228 Interval 7512 (3755500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9638 8 episodes - episode_reward: -185.337 [-243.437, -100.000] - loss: 12.690 - mae: 84.119 - mean_q: -111.245 Interval 7513 (3756000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5540 8 episodes - episode_reward: -159.050 [-230.398, -70.097] - loss: 13.086 - mae: 84.119 - mean_q: -111.260 Interval 7514 (3756500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4589 6 episodes - episode_reward: -209.258 [-286.178, -147.258] - loss: 12.112 - mae: 84.101 - mean_q: -111.249 Interval 7515 (3757000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7228 8 episodes - episode_reward: -173.510 [-209.737, -138.509] - loss: 9.602 - mae: 84.102 - mean_q: -111.258 Interval 7516 (3757500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1416 7 episodes - episode_reward: -214.732 [-371.686, -162.595] - loss: 10.733 - mae: 84.119 - mean_q: -111.281 Interval 7517 (3758000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7310 9 episodes - episode_reward: -208.250 [-348.036, -120.648] - loss: 10.425 - mae: 84.143 - mean_q: -111.312 Interval 7518 (3758500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8792 7 episodes - episode_reward: -133.171 [-210.188, 3.929] - loss: 11.421 - mae: 84.140 - mean_q: -111.312 Interval 7519 (3759000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5094 10 episodes - episode_reward: -180.032 [-263.451, -113.391] - loss: 16.239 - mae: 84.176 - mean_q: -111.310 Interval 7520 (3759500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5988 6 episodes - episode_reward: -211.105 [-328.498, -149.676] - loss: 10.305 - mae: 84.141 - mean_q: -111.291 Interval 7521 (3760000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0751 8 episodes - episode_reward: -198.649 [-261.291, -130.944] - loss: 11.733 - mae: 84.151 - mean_q: -111.321 Interval 7522 (3760500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9739 7 episodes - episode_reward: -211.422 [-398.370, -115.613] - loss: 11.652 - mae: 84.146 - mean_q: -111.341 Interval 7523 (3761000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6615 7 episodes - episode_reward: -182.067 [-243.948, -145.171] - loss: 12.578 - mae: 84.108 - mean_q: -111.338 Interval 7524 (3761500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1452 9 episodes - episode_reward: -180.500 [-248.445, -117.801] - loss: 13.264 - mae: 84.088 - mean_q: -111.341 Interval 7525 (3762000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6845 9 episodes - episode_reward: -143.505 [-181.433, -100.000] - loss: 11.499 - mae: 84.056 - mean_q: -111.365 Interval 7526 (3762500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6917 7 episodes - episode_reward: -185.942 [-244.683, -157.732] - loss: 9.040 - mae: 84.042 - mean_q: -111.367 Interval 7527 (3763000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5844 5 episodes - episode_reward: -268.911 [-456.796, -181.538] - loss: 8.974 - mae: 84.026 - mean_q: -111.359 Interval 7528 (3763500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.6849 6 episodes - episode_reward: -567.425 [-1015.654, -339.094] - loss: 12.023 - mae: 84.039 - mean_q: -111.324 Interval 7529 (3764000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5983 6 episodes - episode_reward: -287.729 [-828.539, -113.243] - loss: 8.983 - mae: 84.040 - mean_q: -111.377 Interval 7530 (3764500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7908 8 episodes - episode_reward: -178.784 [-239.724, -109.342] - loss: 7.098 - mae: 84.065 - mean_q: -111.396 Interval 7531 (3765000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7851 7 episodes - episode_reward: -197.507 [-260.075, -119.016] - loss: 11.613 - mae: 84.105 - mean_q: -111.449 Interval 7532 (3765500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2956 10 episodes - episode_reward: -163.597 [-260.659, -100.000] - loss: 9.375 - mae: 84.140 - mean_q: -111.485 Interval 7533 (3766000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0899 9 episodes - episode_reward: -178.707 [-232.164, -105.025] - loss: 10.725 - mae: 84.176 - mean_q: -111.491 Interval 7534 (3766500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7407 7 episodes - episode_reward: -192.887 [-240.607, -141.541] - loss: 9.024 - mae: 84.200 - mean_q: -111.509 Interval 7535 (3767000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8160 9 episodes - episode_reward: -151.954 [-240.354, 60.380] - loss: 10.373 - mae: 84.211 - mean_q: -111.532 Interval 7536 (3767500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4406 9 episodes - episode_reward: -190.572 [-276.335, -100.000] - loss: 13.289 - mae: 84.243 - mean_q: -111.550 Interval 7537 (3768000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2947 8 episodes - episode_reward: -149.531 [-195.418, 12.686] - loss: 11.680 - mae: 84.268 - mean_q: -111.530 Interval 7538 (3768500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9875 6 episodes - episode_reward: -168.229 [-283.052, -44.822] - loss: 8.758 - mae: 84.263 - mean_q: -111.521 Interval 7539 (3769000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2168 9 episodes - episode_reward: -173.511 [-264.168, -100.000] - loss: 11.127 - mae: 84.306 - mean_q: -111.540 Interval 7540 (3769500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3876 6 episodes - episode_reward: -192.729 [-244.463, -86.696] - loss: 12.819 - mae: 84.327 - mean_q: -111.519 Interval 7541 (3770000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4345 7 episodes - episode_reward: -178.437 [-258.596, -125.114] - loss: 12.621 - mae: 84.334 - mean_q: -111.508 Interval 7542 (3770500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8860 11 episodes - episode_reward: -177.078 [-271.247, -100.000] - loss: 13.131 - mae: 84.344 - mean_q: -111.475 Interval 7543 (3771000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7893 8 episodes - episode_reward: -169.316 [-200.441, -119.250] - loss: 12.105 - mae: 84.342 - mean_q: -111.478 Interval 7544 (3771500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9502 8 episodes - episode_reward: -193.748 [-274.762, -100.000] - loss: 11.136 - mae: 84.332 - mean_q: -111.480 Interval 7545 (3772000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3259 9 episodes - episode_reward: -185.775 [-264.497, -129.120] - loss: 11.540 - mae: 84.338 - mean_q: -111.494 Interval 7546 (3772500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8070 6 episodes - episode_reward: -232.361 [-294.374, -163.488] - loss: 8.917 - mae: 84.324 - mean_q: -111.523 Interval 7547 (3773000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5889 10 episodes - episode_reward: -176.763 [-274.657, -100.000] - loss: 10.598 - mae: 84.337 - mean_q: -111.546 Interval 7548 (3773500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3482 7 episodes - episode_reward: -171.476 [-251.334, -95.819] - loss: 9.453 - mae: 84.333 - mean_q: -111.554 Interval 7549 (3774000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8341 6 episodes - episode_reward: -222.178 [-307.731, -133.946] - loss: 9.902 - mae: 84.337 - mean_q: -111.578 Interval 7550 (3774500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5799 7 episodes - episode_reward: -191.379 [-292.970, -136.293] - loss: 14.768 - mae: 84.360 - mean_q: -111.572 Interval 7551 (3775000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9371 8 episodes - episode_reward: -184.268 [-238.327, -112.238] - loss: 9.285 - mae: 84.352 - mean_q: -111.551 Interval 7552 (3775500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8397 9 episodes - episode_reward: -163.244 [-225.289, -100.000] - loss: 14.624 - mae: 84.395 - mean_q: -111.517 Interval 7553 (3776000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8100 8 episodes - episode_reward: -109.739 [-191.688, 21.610] - loss: 12.338 - mae: 84.376 - mean_q: -111.497 Interval 7554 (3776500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7655 7 episodes - episode_reward: -186.932 [-249.442, -130.769] - loss: 9.551 - mae: 84.369 - mean_q: -111.506 Interval 7555 (3777000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3778 6 episodes - episode_reward: -202.715 [-261.335, -160.427] - loss: 11.636 - mae: 84.371 - mean_q: -111.506 Interval 7556 (3777500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2412 9 episodes - episode_reward: -183.191 [-232.107, -142.055] - loss: 9.466 - mae: 84.375 - mean_q: -111.506 Interval 7557 (3778000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0609 9 episodes - episode_reward: -169.412 [-242.811, 0.648] - loss: 9.458 - mae: 84.378 - mean_q: -111.499 Interval 7558 (3778500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1201 9 episodes - episode_reward: -174.387 [-204.146, -103.460] - loss: 10.867 - mae: 84.372 - mean_q: -111.503 Interval 7559 (3779000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2919 7 episodes - episode_reward: -162.296 [-261.234, 19.545] - loss: 11.520 - mae: 84.377 - mean_q: -111.504 Interval 7560 (3779500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8767 8 episodes - episode_reward: -183.132 [-230.539, -139.133] - loss: 11.165 - mae: 84.371 - mean_q: -111.495 Interval 7561 (3780000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5468 8 episodes - episode_reward: -158.007 [-218.654, 5.066] - loss: 10.582 - mae: 84.378 - mean_q: -111.481 Interval 7562 (3780500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9574 8 episodes - episode_reward: -190.268 [-254.968, -148.880] - loss: 9.255 - mae: 84.377 - mean_q: -111.485 Interval 7563 (3781000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9420 7 episodes - episode_reward: -199.692 [-339.778, -130.632] - loss: 10.396 - mae: 84.381 - mean_q: -111.489 Interval 7564 (3781500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7434 9 episodes - episode_reward: -157.360 [-190.949, -114.400] - loss: 8.920 - mae: 84.379 - mean_q: -111.490 Interval 7565 (3782000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0026 7 episodes - episode_reward: -214.792 [-378.734, -14.363] - loss: 10.845 - mae: 84.377 - mean_q: -111.495 Interval 7566 (3782500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0888 7 episodes - episode_reward: -201.638 [-336.429, -100.000] - loss: 11.744 - mae: 84.373 - mean_q: -111.490 Interval 7567 (3783000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6228 8 episodes - episode_reward: -173.769 [-249.023, -75.585] - loss: 8.242 - mae: 84.359 - mean_q: -111.490 Interval 7568 (3783500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1002 7 episodes - episode_reward: -158.517 [-231.856, -16.154] - loss: 9.412 - mae: 84.364 - mean_q: -111.509 Interval 7569 (3784000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8567 8 episodes - episode_reward: -171.468 [-345.039, -45.302] - loss: 11.178 - mae: 84.364 - mean_q: -111.511 Interval 7570 (3784500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3550 8 episodes - episode_reward: -213.172 [-319.486, -136.670] - loss: 11.308 - mae: 84.378 - mean_q: -111.508 Interval 7571 (3785000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1551 7 episodes - episode_reward: -157.569 [-189.078, -113.540] - loss: 13.500 - mae: 84.365 - mean_q: -111.498 Interval 7572 (3785500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5565 7 episodes - episode_reward: -179.425 [-254.864, -124.465] - loss: 11.186 - mae: 84.359 - mean_q: -111.496 Interval 7573 (3786000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6091 10 episodes - episode_reward: -177.133 [-341.988, -100.000] - loss: 12.678 - mae: 84.359 - mean_q: -111.496 Interval 7574 (3786500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7427 7 episodes - episode_reward: -195.622 [-339.499, -93.785] - loss: 11.683 - mae: 84.373 - mean_q: -111.501 Interval 7575 (3787000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6897 7 episodes - episode_reward: -183.019 [-213.924, -152.858] - loss: 11.503 - mae: 84.370 - mean_q: -111.469 Interval 7576 (3787500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0292 10 episodes - episode_reward: -211.146 [-411.294, -100.000] - loss: 11.465 - mae: 84.373 - mean_q: -111.487 Interval 7577 (3788000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9789 8 episodes - episode_reward: -185.665 [-332.887, -51.238] - loss: 11.880 - mae: 84.366 - mean_q: -111.482 Interval 7578 (3788500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9421 7 episodes - episode_reward: -216.331 [-319.660, -100.000] - loss: 12.411 - mae: 84.353 - mean_q: -111.491 Interval 7579 (3789000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5405 7 episodes - episode_reward: -167.136 [-226.834, -24.494] - loss: 13.097 - mae: 84.330 - mean_q: -111.480 Interval 7580 (3789500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4162 7 episodes - episode_reward: -171.755 [-267.748, -98.546] - loss: 10.600 - mae: 84.328 - mean_q: -111.489 Interval 7581 (3790000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0886 9 episodes - episode_reward: -175.381 [-317.741, -100.000] - loss: 14.085 - mae: 84.341 - mean_q: -111.491 Interval 7582 (3790500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5777 8 episodes - episode_reward: -165.235 [-211.195, -100.000] - loss: 8.363 - mae: 84.302 - mean_q: -111.511 Interval 7583 (3791000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9292 8 episodes - episode_reward: -176.250 [-257.311, -105.585] - loss: 7.560 - mae: 84.305 - mean_q: -111.575 Interval 7584 (3791500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8617 6 episodes - episode_reward: -151.324 [-213.975, -29.321] - loss: 11.616 - mae: 84.332 - mean_q: -111.589 Interval 7585 (3792000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8677 8 episodes - episode_reward: -185.243 [-234.880, -127.204] - loss: 9.144 - mae: 84.320 - mean_q: -111.603 Interval 7586 (3792500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7276 7 episodes - episode_reward: -199.489 [-349.864, -118.756] - loss: 12.501 - mae: 84.333 - mean_q: -111.600 Interval 7587 (3793000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3380 7 episodes - episode_reward: -95.121 [-190.282, 34.716] - loss: 12.247 - mae: 84.341 - mean_q: -111.574 Interval 7588 (3793500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7821 7 episodes - episode_reward: -202.005 [-278.617, -124.421] - loss: 12.531 - mae: 84.336 - mean_q: -111.533 Interval 7589 (3794000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7161 7 episodes - episode_reward: -181.007 [-255.464, -100.000] - loss: 12.557 - mae: 84.323 - mean_q: -111.516 Interval 7590 (3794500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4224 9 episodes - episode_reward: -191.917 [-281.587, -106.914] - loss: 8.868 - mae: 84.309 - mean_q: -111.503 Interval 7591 (3795000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6760 8 episodes - episode_reward: -175.955 [-230.086, -111.888] - loss: 6.789 - mae: 84.282 - mean_q: -111.515 Interval 7592 (3795500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5760 6 episodes - episode_reward: -204.717 [-355.052, -100.945] - loss: 13.151 - mae: 84.298 - mean_q: -111.507 Interval 7593 (3796000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4829 7 episodes - episode_reward: -175.124 [-393.997, 28.006] - loss: 9.535 - mae: 84.272 - mean_q: -111.479 Interval 7594 (3796500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4976 8 episodes - episode_reward: -162.222 [-221.155, -59.476] - loss: 11.486 - mae: 84.273 - mean_q: -111.466 Interval 7595 (3797000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6437 7 episodes - episode_reward: -181.508 [-278.981, -123.683] - loss: 7.494 - mae: 84.247 - mean_q: -111.479 Interval 7596 (3797500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7152 8 episodes - episode_reward: -169.484 [-223.881, -64.220] - loss: 15.181 - mae: 84.268 - mean_q: -111.486 Interval 7597 (3798000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7847 7 episodes - episode_reward: -200.982 [-294.835, -140.287] - loss: 12.011 - mae: 84.238 - mean_q: -111.453 Interval 7598 (3798500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2070 8 episodes - episode_reward: -141.841 [-249.914, 29.126] - loss: 11.010 - mae: 84.237 - mean_q: -111.441 Interval 7599 (3799000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7717 8 episodes - episode_reward: -171.365 [-253.298, -117.509] - loss: 11.180 - mae: 84.244 - mean_q: -111.447 Interval 7600 (3799500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9101 8 episodes - episode_reward: -186.517 [-245.194, -126.209] - loss: 12.517 - mae: 84.242 - mean_q: -111.427 Interval 7601 (3800000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8264 9 episodes - episode_reward: -158.412 [-293.520, -72.582] - loss: 15.547 - mae: 84.235 - mean_q: -111.408 Interval 7602 (3800500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9417 8 episodes - episode_reward: -169.230 [-250.186, -131.438] - loss: 14.596 - mae: 84.212 - mean_q: -111.403 Interval 7603 (3801000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9718 6 episodes - episode_reward: -183.279 [-222.051, -160.277] - loss: 8.606 - mae: 84.183 - mean_q: -111.411 Interval 7604 (3801500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3530 7 episodes - episode_reward: -168.493 [-214.789, -141.732] - loss: 10.859 - mae: 84.189 - mean_q: -111.422 Interval 7605 (3802000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4994 7 episodes - episode_reward: -171.225 [-247.064, -117.613] - loss: 11.803 - mae: 84.187 - mean_q: -111.400 Interval 7606 (3802500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1971 10 episodes - episode_reward: -163.381 [-212.592, -132.928] - loss: 8.394 - mae: 84.149 - mean_q: -111.403 Interval 7607 (3803000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6867 6 episodes - episode_reward: -211.728 [-259.155, -157.415] - loss: 11.688 - mae: 84.156 - mean_q: -111.425 Interval 7608 (3803500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9225 8 episodes - episode_reward: -194.720 [-267.798, -110.207] - loss: 10.892 - mae: 84.132 - mean_q: -111.434 Interval 7609 (3804000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4876 7 episodes - episode_reward: -172.885 [-326.044, -16.805] - loss: 10.001 - mae: 84.120 - mean_q: -111.445 Interval 7610 (3804500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0918 7 episodes - episode_reward: -212.393 [-296.050, -100.000] - loss: 10.440 - mae: 84.124 - mean_q: -111.436 Interval 7611 (3805000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4125 10 episodes - episode_reward: -176.017 [-236.047, -112.987] - loss: 10.433 - mae: 84.136 - mean_q: -111.432 Interval 7612 (3805500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8715 7 episodes - episode_reward: -195.437 [-253.130, -163.206] - loss: 15.062 - mae: 84.158 - mean_q: -111.393 Interval 7613 (3806000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6423 7 episodes - episode_reward: -191.403 [-255.859, -31.579] - loss: 9.573 - mae: 84.123 - mean_q: -111.371 Interval 7614 (3806500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3280 9 episodes - episode_reward: -192.974 [-335.679, -100.000] - loss: 11.241 - mae: 84.119 - mean_q: -111.388 Interval 7615 (3807000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6548 7 episodes - episode_reward: -182.800 [-297.429, -118.429] - loss: 11.147 - mae: 84.114 - mean_q: -111.373 Interval 7616 (3807500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9409 8 episodes - episode_reward: -173.445 [-240.294, -138.247] - loss: 10.899 - mae: 84.129 - mean_q: -111.360 Interval 7617 (3808000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0291 8 episodes - episode_reward: -204.807 [-293.997, -135.132] - loss: 14.232 - mae: 84.134 - mean_q: -111.347 Interval 7618 (3808500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3613 7 episodes - episode_reward: -165.899 [-262.507, -100.000] - loss: 10.737 - mae: 84.103 - mean_q: -111.313 Interval 7619 (3809000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0176 7 episodes - episode_reward: -142.376 [-216.223, -19.757] - loss: 11.872 - mae: 84.114 - mean_q: -111.299 Interval 7620 (3809500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8330 7 episodes - episode_reward: -196.283 [-259.657, -136.672] - loss: 11.918 - mae: 84.098 - mean_q: -111.294 Interval 7621 (3810000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0040 9 episodes - episode_reward: -174.066 [-229.269, -142.474] - loss: 9.837 - mae: 84.069 - mean_q: -111.277 Interval 7622 (3810500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6927 7 episodes - episode_reward: -198.921 [-267.973, -149.879] - loss: 9.297 - mae: 84.067 - mean_q: -111.296 Interval 7623 (3811000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5294 7 episodes - episode_reward: -150.647 [-196.630, -97.136] - loss: 10.522 - mae: 84.071 - mean_q: -111.310 Interval 7624 (3811500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3731 9 episodes - episode_reward: -206.336 [-311.159, -100.000] - loss: 10.340 - mae: 84.070 - mean_q: -111.298 Interval 7625 (3812000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5894 7 episodes - episode_reward: -188.488 [-234.374, -159.470] - loss: 12.241 - mae: 84.074 - mean_q: -111.289 Interval 7626 (3812500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6841 7 episodes - episode_reward: -191.741 [-390.926, -123.649] - loss: 10.532 - mae: 84.066 - mean_q: -111.271 Interval 7627 (3813000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4202 8 episodes - episode_reward: -144.206 [-204.393, -10.037] - loss: 12.352 - mae: 84.034 - mean_q: -111.272 Interval 7628 (3813500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8748 8 episodes - episode_reward: -181.205 [-250.705, -100.000] - loss: 13.584 - mae: 84.016 - mean_q: -111.218 Interval 7629 (3814000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8176 8 episodes - episode_reward: -177.862 [-269.723, -131.435] - loss: 11.279 - mae: 83.965 - mean_q: -111.223 Interval 7630 (3814500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3505 9 episodes - episode_reward: -184.938 [-283.874, -100.000] - loss: 13.536 - mae: 83.952 - mean_q: -111.206 Interval 7631 (3815000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9493 7 episodes - episode_reward: -209.592 [-235.960, -180.379] - loss: 15.699 - mae: 83.911 - mean_q: -111.164 Interval 7632 (3815500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0130 7 episodes - episode_reward: -142.944 [-256.835, 21.370] - loss: 11.832 - mae: 83.872 - mean_q: -111.119 Interval 7633 (3816000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6937 8 episodes - episode_reward: -170.290 [-221.408, -96.538] - loss: 9.821 - mae: 83.826 - mean_q: -111.107 Interval 7634 (3816500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2874 8 episodes - episode_reward: -308.174 [-627.051, -100.000] - loss: 8.510 - mae: 83.775 - mean_q: -111.085 Interval 7635 (3817000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7007 5 episodes - episode_reward: -390.010 [-715.930, -230.807] - loss: 9.613 - mae: 83.781 - mean_q: -111.091 Interval 7636 (3817500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3706 9 episodes - episode_reward: -200.367 [-316.967, -151.992] - loss: 9.512 - mae: 83.788 - mean_q: -111.092 Interval 7637 (3818000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9973 7 episodes - episode_reward: -139.724 [-214.804, -40.380] - loss: 9.623 - mae: 83.779 - mean_q: -111.095 Interval 7638 (3818500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0673 8 episodes - episode_reward: -182.567 [-269.390, -147.518] - loss: 14.786 - mae: 83.790 - mean_q: -111.076 Interval 7639 (3819000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9390 7 episodes - episode_reward: -210.145 [-344.128, -127.931] - loss: 12.349 - mae: 83.773 - mean_q: -111.043 Interval 7640 (3819500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8736 10 episodes - episode_reward: -150.271 [-245.997, 55.850] - loss: 10.795 - mae: 83.761 - mean_q: -111.027 Interval 7641 (3820000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9033 7 episodes - episode_reward: -198.976 [-362.606, -135.424] - loss: 10.286 - mae: 83.751 - mean_q: -111.014 Interval 7642 (3820500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7688 8 episodes - episode_reward: -182.916 [-287.582, 6.678] - loss: 9.019 - mae: 83.734 - mean_q: -111.051 Interval 7643 (3821000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9325 8 episodes - episode_reward: -180.838 [-259.057, -124.794] - loss: 13.998 - mae: 83.751 - mean_q: -111.072 Interval 7644 (3821500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4380 8 episodes - episode_reward: -153.223 [-276.824, -6.234] - loss: 13.208 - mae: 83.765 - mean_q: -111.045 Interval 7645 (3822000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2833 7 episodes - episode_reward: -231.776 [-445.176, -157.291] - loss: 12.936 - mae: 83.754 - mean_q: -111.012 Interval 7646 (3822500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9807 9 episodes - episode_reward: -169.520 [-352.056, 22.028] - loss: 11.818 - mae: 83.747 - mean_q: -111.000 Interval 7647 (3823000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5255 7 episodes - episode_reward: -108.458 [-187.646, -16.581] - loss: 13.453 - mae: 83.741 - mean_q: -110.987 Interval 7648 (3823500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4728 6 episodes - episode_reward: -196.184 [-254.566, -142.504] - loss: 15.244 - mae: 83.755 - mean_q: -110.959 Interval 7649 (3824000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9257 8 episodes - episode_reward: -190.160 [-236.885, -153.733] - loss: 9.170 - mae: 83.715 - mean_q: -110.963 Interval 7650 (3824500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3410 9 episodes - episode_reward: -185.324 [-338.254, -103.534] - loss: 11.578 - mae: 83.715 - mean_q: -110.963 Interval 7651 (3825000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7180 7 episodes - episode_reward: -173.674 [-371.654, -32.158] - loss: 15.020 - mae: 83.721 - mean_q: -110.953 Interval 7652 (3825500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8258 8 episodes - episode_reward: -188.142 [-272.031, -129.237] - loss: 10.937 - mae: 83.694 - mean_q: -110.928 Interval 7653 (3826000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0648 7 episodes - episode_reward: -212.561 [-309.169, -136.798] - loss: 13.052 - mae: 83.703 - mean_q: -110.923 Interval 7654 (3826500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9445 7 episodes - episode_reward: -214.563 [-271.100, -139.059] - loss: 13.715 - mae: 83.703 - mean_q: -110.936 Interval 7655 (3827000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1147 8 episodes - episode_reward: -203.535 [-279.123, -100.000] - loss: 10.619 - mae: 83.686 - mean_q: -110.923 Interval 7656 (3827500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4492 7 episodes - episode_reward: -166.959 [-290.069, -99.219] - loss: 10.230 - mae: 83.688 - mean_q: -110.938 Interval 7657 (3828000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2656 9 episodes - episode_reward: -179.553 [-279.766, -119.903] - loss: 10.374 - mae: 83.690 - mean_q: -110.942 Interval 7658 (3828500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6897 8 episodes - episode_reward: -177.113 [-223.401, -112.874] - loss: 12.174 - mae: 83.695 - mean_q: -110.938 Interval 7659 (3829000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0209 7 episodes - episode_reward: -141.983 [-217.187, -57.295] - loss: 10.491 - mae: 83.695 - mean_q: -110.954 Interval 7660 (3829500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9679 6 episodes - episode_reward: -161.600 [-259.132, -9.583] - loss: 14.166 - mae: 83.702 - mean_q: -110.959 Interval 7661 (3830000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7859 9 episodes - episode_reward: -208.749 [-313.565, -100.000] - loss: 10.816 - mae: 83.690 - mean_q: -110.940 Interval 7662 (3830500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0730 8 episodes - episode_reward: -196.552 [-229.071, -149.190] - loss: 11.573 - mae: 83.685 - mean_q: -110.936 Interval 7663 (3831000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7481 7 episodes - episode_reward: -183.810 [-285.565, -100.000] - loss: 11.853 - mae: 83.691 - mean_q: -110.941 Interval 7664 (3831500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6530 8 episodes - episode_reward: -262.218 [-552.422, -146.190] - loss: 12.919 - mae: 83.680 - mean_q: -110.935 Interval 7665 (3832000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.3657 8 episodes - episode_reward: -427.495 [-674.135, -119.932] - loss: 11.522 - mae: 83.677 - mean_q: -110.942 Interval 7666 (3832500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1065 8 episodes - episode_reward: -200.213 [-253.623, -153.357] - loss: 9.190 - mae: 83.693 - mean_q: -110.954 Interval 7667 (3833000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5750 7 episodes - episode_reward: -180.329 [-253.604, -50.738] - loss: 15.929 - mae: 83.753 - mean_q: -110.957 Interval 7668 (3833500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8064 8 episodes - episode_reward: -112.662 [-215.117, 19.442] - loss: 9.978 - mae: 83.741 - mean_q: -110.953 Interval 7669 (3834000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6967 8 episodes - episode_reward: -170.092 [-208.733, -2.500] - loss: 12.466 - mae: 83.774 - mean_q: -110.961 Interval 7670 (3834500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8239 7 episodes - episode_reward: -196.631 [-231.017, -171.162] - loss: 10.952 - mae: 83.796 - mean_q: -110.961 Interval 7671 (3835000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8487 8 episodes - episode_reward: -189.462 [-227.316, -150.102] - loss: 10.555 - mae: 83.804 - mean_q: -110.971 Interval 7672 (3835500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6872 7 episodes - episode_reward: -184.785 [-233.889, -130.645] - loss: 11.354 - mae: 83.831 - mean_q: -110.975 Interval 7673 (3836000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9861 9 episodes - episode_reward: -170.156 [-283.447, -99.498] - loss: 11.641 - mae: 83.829 - mean_q: -110.976 Interval 7674 (3836500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8271 7 episodes - episode_reward: -132.888 [-216.002, 15.022] - loss: 13.559 - mae: 83.863 - mean_q: -110.969 Interval 7675 (3837000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6314 7 episodes - episode_reward: -187.438 [-252.589, -125.007] - loss: 12.940 - mae: 83.864 - mean_q: -110.970 Interval 7676 (3837500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2766 8 episodes - episode_reward: -197.581 [-259.559, -121.357] - loss: 11.193 - mae: 83.861 - mean_q: -110.958 Interval 7677 (3838000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4136 7 episodes - episode_reward: -171.435 [-254.238, -109.646] - loss: 11.072 - mae: 83.879 - mean_q: -110.946 Interval 7678 (3838500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9669 8 episodes - episode_reward: -194.635 [-274.536, -106.785] - loss: 12.242 - mae: 83.888 - mean_q: -110.951 Interval 7679 (3839000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1063 8 episodes - episode_reward: -186.406 [-358.214, -57.612] - loss: 12.455 - mae: 83.881 - mean_q: -110.937 Interval 7680 (3839500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7008 8 episodes - episode_reward: -173.463 [-245.367, -138.067] - loss: 14.440 - mae: 83.892 - mean_q: -110.932 Interval 7681 (3840000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9701 8 episodes - episode_reward: -178.859 [-216.672, -146.347] - loss: 13.658 - mae: 83.875 - mean_q: -110.922 Interval 7682 (3840500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2622 9 episodes - episode_reward: -179.495 [-274.310, -109.011] - loss: 15.912 - mae: 83.880 - mean_q: -110.904 Interval 7683 (3841000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2060 8 episodes - episode_reward: -207.978 [-352.585, -150.906] - loss: 14.287 - mae: 83.881 - mean_q: -110.904 Interval 7684 (3841500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6684 8 episodes - episode_reward: -164.998 [-220.510, -87.013] - loss: 12.521 - mae: 83.883 - mean_q: -110.891 Interval 7685 (3842000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2429 9 episodes - episode_reward: -167.529 [-271.906, -100.000] - loss: 12.219 - mae: 83.885 - mean_q: -110.889 Interval 7686 (3842500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4582 10 episodes - episode_reward: -186.673 [-280.733, -116.877] - loss: 9.953 - mae: 83.875 - mean_q: -110.891 Interval 7687 (3843000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0770 7 episodes - episode_reward: -217.562 [-282.912, -151.696] - loss: 14.225 - mae: 83.900 - mean_q: -110.876 Interval 7688 (3843500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6830 7 episodes - episode_reward: -183.597 [-243.686, -100.000] - loss: 11.080 - mae: 83.876 - mean_q: -110.886 Interval 7689 (3844000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2461 10 episodes - episode_reward: -171.110 [-251.683, -100.000] - loss: 9.651 - mae: 83.856 - mean_q: -110.907 Interval 7690 (3844500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4160 6 episodes - episode_reward: -182.917 [-240.136, -149.924] - loss: 15.470 - mae: 83.886 - mean_q: -110.911 Interval 7691 (3845000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.6014 9 episodes - episode_reward: -204.657 [-292.185, -100.000] - loss: 11.163 - mae: 83.875 - mean_q: -110.905 Interval 7692 (3845500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1117 9 episodes - episode_reward: -180.176 [-294.836, -121.648] - loss: 11.289 - mae: 83.878 - mean_q: -110.920 Interval 7693 (3846000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0459 8 episodes - episode_reward: -185.444 [-241.269, -100.000] - loss: 11.289 - mae: 83.885 - mean_q: -110.940 Interval 7694 (3846500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1830 9 episodes - episode_reward: -176.622 [-225.958, -116.719] - loss: 11.456 - mae: 83.895 - mean_q: -110.977 Interval 7695 (3847000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.0230 10 episodes - episode_reward: -305.299 [-498.283, -129.500] - loss: 10.866 - mae: 83.888 - mean_q: -111.003 Interval 7696 (3847500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9087 8 episodes - episode_reward: -241.909 [-412.161, -147.032] - loss: 13.436 - mae: 83.938 - mean_q: -111.024 Interval 7697 (3848000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7814 7 episodes - episode_reward: -193.016 [-264.472, -149.672] - loss: 7.605 - mae: 83.937 - mean_q: -111.074 Interval 7698 (3848500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0219 9 episodes - episode_reward: -164.885 [-204.743, -110.231] - loss: 9.950 - mae: 83.989 - mean_q: -111.111 Interval 7699 (3849000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7172 6 episodes - episode_reward: -226.450 [-287.416, -179.751] - loss: 12.123 - mae: 84.027 - mean_q: -111.140 Interval 7700 (3849500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4201 9 episodes - episode_reward: -192.337 [-258.565, -123.168] - loss: 14.556 - mae: 84.063 - mean_q: -111.135 Interval 7701 (3850000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1323 8 episodes - episode_reward: -198.023 [-247.809, -166.021] - loss: 14.504 - mae: 84.082 - mean_q: -111.140 Interval 7702 (3850500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7255 9 episodes - episode_reward: -145.874 [-192.903, -100.000] - loss: 11.261 - mae: 84.093 - mean_q: -111.157 Interval 7703 (3851000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6253 8 episodes - episode_reward: -173.846 [-207.342, -151.184] - loss: 9.838 - mae: 84.124 - mean_q: -111.184 Interval 7704 (3851500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8124 8 episodes - episode_reward: -170.100 [-221.816, -100.000] - loss: 17.356 - mae: 84.174 - mean_q: -111.172 Interval 7705 (3852000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8504 9 episodes - episode_reward: -165.572 [-227.571, -115.612] - loss: 10.425 - mae: 84.155 - mean_q: -111.177 Interval 7706 (3852500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8249 8 episodes - episode_reward: -177.882 [-237.372, -119.520] - loss: 9.615 - mae: 84.171 - mean_q: -111.208 Interval 7707 (3853000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8742 7 episodes - episode_reward: -203.313 [-376.300, -124.179] - loss: 11.890 - mae: 84.214 - mean_q: -111.244 Interval 7708 (3853500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5399 7 episodes - episode_reward: -177.733 [-240.075, -114.493] - loss: 9.696 - mae: 84.236 - mean_q: -111.288 Interval 7709 (3854000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3415 8 episodes - episode_reward: -150.563 [-225.736, 22.417] - loss: 11.318 - mae: 84.263 - mean_q: -111.306 Interval 7710 (3854500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4377 6 episodes - episode_reward: -200.478 [-309.367, -129.313] - loss: 14.417 - mae: 84.287 - mean_q: -111.317 Interval 7711 (3855000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1082 5 episodes - episode_reward: -292.415 [-497.926, -140.320] - loss: 7.407 - mae: 84.264 - mean_q: -111.320 Interval 7712 (3855500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2938 9 episodes - episode_reward: -305.442 [-531.732, -157.915] - loss: 8.796 - mae: 84.304 - mean_q: -111.358 Interval 7713 (3856000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5251 6 episodes - episode_reward: -184.207 [-223.688, -130.078] - loss: 9.802 - mae: 84.336 - mean_q: -111.407 Interval 7714 (3856500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3191 7 episodes - episode_reward: -248.359 [-412.412, -116.228] - loss: 10.410 - mae: 84.378 - mean_q: -111.445 Interval 7715 (3857000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7586 7 episodes - episode_reward: -195.657 [-255.932, -133.882] - loss: 11.413 - mae: 84.405 - mean_q: -111.470 Interval 7716 (3857500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7273 7 episodes - episode_reward: -205.081 [-330.474, -142.720] - loss: 11.674 - mae: 84.444 - mean_q: -111.490 Interval 7717 (3858000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9704 7 episodes - episode_reward: -218.134 [-299.734, -156.390] - loss: 12.303 - mae: 84.465 - mean_q: -111.519 Interval 7718 (3858500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6511 7 episodes - episode_reward: -175.206 [-201.927, -158.957] - loss: 9.884 - mae: 84.481 - mean_q: -111.542 Interval 7719 (3859000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2149 8 episodes - episode_reward: -210.345 [-304.603, -128.908] - loss: 12.981 - mae: 84.519 - mean_q: -111.569 Interval 7720 (3859500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2801 7 episodes - episode_reward: -156.916 [-183.778, -130.946] - loss: 11.362 - mae: 84.544 - mean_q: -111.564 Interval 7721 (3860000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0384 7 episodes - episode_reward: -204.508 [-390.183, -129.233] - loss: 10.100 - mae: 84.558 - mean_q: -111.591 Interval 7722 (3860500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7249 8 episodes - episode_reward: -181.833 [-219.288, -100.000] - loss: 12.663 - mae: 84.605 - mean_q: -111.605 Interval 7723 (3861000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4199 10 episodes - episode_reward: -167.265 [-245.348, -120.134] - loss: 11.240 - mae: 84.615 - mean_q: -111.616 Interval 7724 (3861500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7236 10 episodes - episode_reward: -138.447 [-295.648, -30.078] - loss: 11.875 - mae: 84.637 - mean_q: -111.636 Interval 7725 (3862000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4160 10 episodes - episode_reward: -172.929 [-236.745, -138.314] - loss: 8.984 - mae: 84.627 - mean_q: -111.647 Interval 7726 (3862500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6354 7 episodes - episode_reward: -183.195 [-341.662, -127.691] - loss: 9.872 - mae: 84.650 - mean_q: -111.675 Interval 7727 (3863000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3820 10 episodes - episode_reward: -174.795 [-220.432, -143.342] - loss: 10.740 - mae: 84.678 - mean_q: -111.725 Interval 7728 (3863500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3831 10 episodes - episode_reward: -173.357 [-253.603, -127.519] - loss: 8.728 - mae: 84.703 - mean_q: -111.770 Interval 7729 (3864000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6542 7 episodes - episode_reward: -180.695 [-249.872, -134.064] - loss: 10.090 - mae: 84.734 - mean_q: -111.814 Interval 7730 (3864500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9078 8 episodes - episode_reward: -182.720 [-258.342, -103.019] - loss: 10.727 - mae: 84.764 - mean_q: -111.834 Interval 7731 (3865000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8421 8 episodes - episode_reward: -174.546 [-227.548, -134.577] - loss: 10.568 - mae: 84.773 - mean_q: -111.853 Interval 7732 (3865500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7319 7 episodes - episode_reward: -197.759 [-250.479, -140.087] - loss: 12.605 - mae: 84.798 - mean_q: -111.897 Interval 7733 (3866000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7234 7 episodes - episode_reward: -191.326 [-386.143, -79.775] - loss: 9.973 - mae: 84.811 - mean_q: -111.925 Interval 7734 (3866500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7417 8 episodes - episode_reward: -174.398 [-210.406, -143.126] - loss: 7.750 - mae: 84.821 - mean_q: -111.983 Interval 7735 (3867000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3094 8 episodes - episode_reward: -143.227 [-238.208, -57.365] - loss: 11.407 - mae: 84.856 - mean_q: -112.021 Interval 7736 (3867500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4788 7 episodes - episode_reward: -177.915 [-239.536, -111.483] - loss: 12.904 - mae: 84.866 - mean_q: -112.024 Interval 7737 (3868000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2425 7 episodes - episode_reward: -156.755 [-208.487, -133.890] - loss: 13.032 - mae: 84.862 - mean_q: -112.010 Interval 7738 (3868500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2206 7 episodes - episode_reward: -166.105 [-260.798, 0.629] - loss: 10.132 - mae: 84.862 - mean_q: -112.025 Interval 7739 (3869000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5058 7 episodes - episode_reward: -173.367 [-210.082, -100.000] - loss: 12.000 - mae: 84.870 - mean_q: -112.052 Interval 7740 (3869500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1348 8 episodes - episode_reward: -175.376 [-198.991, -151.649] - loss: 9.773 - mae: 84.864 - mean_q: -112.059 Interval 7741 (3870000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.7900 2 episodes - episode_reward: -698.431 [-885.132, -511.730] - loss: 8.869 - mae: 84.898 - mean_q: -112.071 Interval 7742 (3870500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9233 7 episodes - episode_reward: -437.128 [-1887.835, -177.683] - loss: 11.106 - mae: 84.933 - mean_q: -112.108 Interval 7743 (3871000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4347 10 episodes - episode_reward: -182.460 [-277.573, -109.641] - loss: 9.541 - mae: 84.942 - mean_q: -112.122 Interval 7744 (3871500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3620 7 episodes - episode_reward: -168.195 [-275.292, -54.465] - loss: 9.417 - mae: 84.973 - mean_q: -112.151 Interval 7745 (3872000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5767 8 episodes - episode_reward: -159.979 [-220.236, -101.515] - loss: 11.955 - mae: 85.002 - mean_q: -112.158 Interval 7746 (3872500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9743 8 episodes - episode_reward: -181.265 [-244.982, -139.368] - loss: 9.070 - mae: 85.008 - mean_q: -112.167 Interval 7747 (3873000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7813 8 episodes - episode_reward: -177.667 [-287.961, -118.525] - loss: 7.910 - mae: 85.038 - mean_q: -112.193 Interval 7748 (3873500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8934 8 episodes - episode_reward: -176.590 [-206.138, -84.246] - loss: 10.331 - mae: 85.071 - mean_q: -112.220 Interval 7749 (3874000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8528 8 episodes - episode_reward: -181.925 [-270.507, -100.000] - loss: 9.743 - mae: 85.101 - mean_q: -112.220 Interval 7750 (3874500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9648 7 episodes - episode_reward: -215.711 [-285.011, -145.323] - loss: 9.679 - mae: 85.136 - mean_q: -112.225 Interval 7751 (3875000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8645 6 episodes - episode_reward: -152.511 [-218.909, -69.721] - loss: 10.434 - mae: 85.144 - mean_q: -112.230 Interval 7752 (3875500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6779 7 episodes - episode_reward: -193.893 [-228.450, -159.132] - loss: 9.186 - mae: 85.153 - mean_q: -112.254 Interval 7753 (3876000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6535 5 episodes - episode_reward: -219.291 [-310.391, -176.511] - loss: 9.419 - mae: 85.152 - mean_q: -112.268 Interval 7754 (3876500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8305 8 episodes - episode_reward: -203.167 [-357.406, -113.960] - loss: 8.585 - mae: 85.171 - mean_q: -112.285 Interval 7755 (3877000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0292 9 episodes - episode_reward: -169.251 [-287.976, 4.166] - loss: 8.258 - mae: 85.200 - mean_q: -112.307 Interval 7756 (3877500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8217 8 episodes - episode_reward: -176.792 [-221.278, -122.483] - loss: 9.298 - mae: 85.232 - mean_q: -112.317 Interval 7757 (3878000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7797 8 episodes - episode_reward: -158.527 [-189.424, -114.655] - loss: 8.242 - mae: 85.251 - mean_q: -112.318 Interval 7758 (3878500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2938 10 episodes - episode_reward: -175.338 [-232.071, -112.046] - loss: 10.261 - mae: 85.276 - mean_q: -112.361 Interval 7759 (3879000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7647 8 episodes - episode_reward: -169.852 [-236.206, -100.441] - loss: 7.557 - mae: 85.273 - mean_q: -112.388 Interval 7760 (3879500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9792 7 episodes - episode_reward: -219.067 [-309.357, -139.838] - loss: 9.070 - mae: 85.287 - mean_q: -112.441 Interval 7761 (3880000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9513 7 episodes - episode_reward: -197.630 [-266.582, -100.000] - loss: 10.283 - mae: 85.296 - mean_q: -112.457 Interval 7762 (3880500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5530 8 episodes - episode_reward: -226.947 [-485.113, -135.487] - loss: 8.363 - mae: 85.318 - mean_q: -112.496 Interval 7763 (3881000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0547 7 episodes - episode_reward: -147.274 [-259.986, 12.256] - loss: 9.036 - mae: 85.336 - mean_q: -112.501 Interval 7764 (3881500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3296 6 episodes - episode_reward: -187.374 [-252.994, -142.069] - loss: 8.415 - mae: 85.317 - mean_q: -112.528 Interval 7765 (3882000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2054 9 episodes - episode_reward: -187.346 [-258.767, -111.773] - loss: 11.528 - mae: 85.325 - mean_q: -112.535 Interval 7766 (3882500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4603 8 episodes - episode_reward: -149.530 [-219.817, 14.768] - loss: 10.739 - mae: 85.311 - mean_q: -112.536 Interval 7767 (3883000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8481 7 episodes - episode_reward: -202.154 [-282.776, -109.383] - loss: 10.483 - mae: 85.295 - mean_q: -112.527 Interval 7768 (3883500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5430 7 episodes - episode_reward: -183.560 [-265.795, -62.388] - loss: 8.617 - mae: 85.286 - mean_q: -112.536 Interval 7769 (3884000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8035 7 episodes - episode_reward: -121.712 [-186.000, 40.763] - loss: 9.292 - mae: 85.285 - mean_q: -112.549 Interval 7770 (3884500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1102 8 episodes - episode_reward: -206.014 [-256.033, -112.660] - loss: 7.630 - mae: 85.270 - mean_q: -112.594 Interval 7771 (3885000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3828 8 episodes - episode_reward: -147.040 [-190.278, -31.590] - loss: 10.987 - mae: 85.293 - mean_q: -112.621 Interval 7772 (3885500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4014 9 episodes - episode_reward: -183.548 [-377.244, -100.000] - loss: 8.442 - mae: 85.282 - mean_q: -112.643 Interval 7773 (3886000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1019 7 episodes - episode_reward: -207.488 [-250.447, -168.919] - loss: 10.161 - mae: 85.283 - mean_q: -112.649 Interval 7774 (3886500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2082 9 episodes - episode_reward: -193.369 [-322.088, -109.571] - loss: 11.629 - mae: 85.293 - mean_q: -112.651 Interval 7775 (3887000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4864 8 episodes - episode_reward: -216.068 [-350.791, -110.866] - loss: 9.399 - mae: 85.278 - mean_q: -112.667 Interval 7776 (3887500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2967 7 episodes - episode_reward: -151.877 [-251.058, 15.197] - loss: 11.557 - mae: 85.285 - mean_q: -112.682 Interval 7777 (3888000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9167 8 episodes - episode_reward: -190.098 [-250.222, -144.692] - loss: 9.303 - mae: 85.294 - mean_q: -112.680 Interval 7778 (3888500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0200 8 episodes - episode_reward: -132.464 [-277.125, 58.133] - loss: 8.952 - mae: 85.295 - mean_q: -112.684 Interval 7779 (3889000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5388 7 episodes - episode_reward: -171.166 [-230.245, -108.399] - loss: 8.128 - mae: 85.300 - mean_q: -112.695 Interval 7780 (3889500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1819 9 episodes - episode_reward: -177.712 [-248.250, -102.767] - loss: 11.119 - mae: 85.327 - mean_q: -112.683 Interval 7781 (3890000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6270 8 episodes - episode_reward: -167.519 [-213.008, -73.666] - loss: 11.730 - mae: 85.318 - mean_q: -112.658 Interval 7782 (3890500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5940 6 episodes - episode_reward: -206.855 [-305.394, -141.694] - loss: 8.291 - mae: 85.313 - mean_q: -112.664 Interval 7783 (3891000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3840 8 episodes - episode_reward: -201.292 [-369.897, -100.000] - loss: 7.967 - mae: 85.296 - mean_q: -112.700 Interval 7784 (3891500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1026 7 episodes - episode_reward: -230.836 [-438.635, -127.651] - loss: 8.471 - mae: 85.318 - mean_q: -112.714 Interval 7785 (3892000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6283 8 episodes - episode_reward: -179.926 [-254.922, -138.432] - loss: 12.043 - mae: 85.332 - mean_q: -112.726 Interval 7786 (3892500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3803 7 episodes - episode_reward: -161.598 [-198.634, -133.820] - loss: 10.131 - mae: 85.342 - mean_q: -112.709 Interval 7787 (3893000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2619 9 episodes - episode_reward: -179.003 [-214.360, -100.000] - loss: 8.911 - mae: 85.354 - mean_q: -112.688 Interval 7788 (3893500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1263 10 episodes - episode_reward: -162.618 [-232.374, -91.380] - loss: 11.058 - mae: 85.369 - mean_q: -112.680 Interval 7789 (3894000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2608 8 episodes - episode_reward: -200.633 [-328.272, -120.776] - loss: 9.837 - mae: 85.371 - mean_q: -112.652 Interval 7790 (3894500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9872 8 episodes - episode_reward: -186.219 [-243.129, -127.417] - loss: 13.438 - mae: 85.397 - mean_q: -112.635 Interval 7791 (3895000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4057 9 episodes - episode_reward: -194.180 [-317.902, -80.670] - loss: 9.648 - mae: 85.359 - mean_q: -112.626 Interval 7792 (3895500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3731 9 episodes - episode_reward: -183.623 [-338.988, -110.189] - loss: 7.721 - mae: 85.341 - mean_q: -112.637 Interval 7793 (3896000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5684 8 episodes - episode_reward: -160.870 [-281.327, -42.424] - loss: 8.344 - mae: 85.344 - mean_q: -112.658 Interval 7794 (3896500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8018 7 episodes - episode_reward: -191.558 [-242.549, -150.299] - loss: 9.188 - mae: 85.343 - mean_q: -112.679 Interval 7795 (3897000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9018 8 episodes - episode_reward: -186.350 [-255.236, -130.464] - loss: 7.899 - mae: 85.309 - mean_q: -112.686 Interval 7796 (3897500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.2135 9 episodes - episode_reward: -339.875 [-784.041, -146.743] - loss: 9.656 - mae: 85.301 - mean_q: -112.731 Interval 7797 (3898000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8650 9 episodes - episode_reward: -166.406 [-237.544, -54.841] - loss: 9.714 - mae: 85.312 - mean_q: -112.739 Interval 7798 (3898500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0061 7 episodes - episode_reward: -215.381 [-371.758, -100.000] - loss: 12.437 - mae: 85.319 - mean_q: -112.732 Interval 7799 (3899000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4231 9 episodes - episode_reward: -188.570 [-259.963, -101.594] - loss: 9.468 - mae: 85.306 - mean_q: -112.710 Interval 7800 (3899500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9455 9 episodes - episode_reward: -165.257 [-218.893, -100.000] - loss: 11.948 - mae: 85.318 - mean_q: -112.710 Interval 7801 (3900000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6298 8 episodes - episode_reward: -159.545 [-208.113, -98.822] - loss: 10.078 - mae: 85.312 - mean_q: -112.686 Interval 7802 (3900500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4336 8 episodes - episode_reward: -148.634 [-247.250, 6.393] - loss: 12.046 - mae: 85.316 - mean_q: -112.686 Interval 7803 (3901000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4799 8 episodes - episode_reward: -224.213 [-267.233, -175.208] - loss: 9.868 - mae: 85.322 - mean_q: -112.691 Interval 7804 (3901500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7846 8 episodes - episode_reward: -176.556 [-248.577, -127.161] - loss: 9.036 - mae: 85.325 - mean_q: -112.698 Interval 7805 (3902000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0574 7 episodes - episode_reward: -210.285 [-311.055, -123.928] - loss: 9.546 - mae: 85.345 - mean_q: -112.731 Interval 7806 (3902500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4114 10 episodes - episode_reward: -177.387 [-282.425, -114.868] - loss: 8.451 - mae: 85.332 - mean_q: -112.744 Interval 7807 (3903000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5231 7 episodes - episode_reward: -174.337 [-219.851, -135.736] - loss: 11.650 - mae: 85.362 - mean_q: -112.759 Interval 7808 (3903500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2187 8 episodes - episode_reward: -194.811 [-297.532, -136.492] - loss: 8.376 - mae: 85.345 - mean_q: -112.763 Interval 7809 (3904000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0986 9 episodes - episode_reward: -181.900 [-277.891, -100.000] - loss: 9.124 - mae: 85.368 - mean_q: -112.792 Interval 7810 (3904500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0141 9 episodes - episode_reward: -157.227 [-195.764, -111.561] - loss: 8.803 - mae: 85.369 - mean_q: -112.828 Interval 7811 (3905000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1126 10 episodes - episode_reward: -164.856 [-241.933, -100.000] - loss: 13.048 - mae: 85.402 - mean_q: -112.830 Interval 7812 (3905500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1773 8 episodes - episode_reward: -200.203 [-316.577, -88.792] - loss: 11.053 - mae: 85.408 - mean_q: -112.803 Interval 7813 (3906000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9390 7 episodes - episode_reward: -202.022 [-316.558, -146.099] - loss: 11.689 - mae: 85.373 - mean_q: -112.808 Interval 7814 (3906500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5797 8 episodes - episode_reward: -159.940 [-294.294, -3.336] - loss: 8.386 - mae: 85.353 - mean_q: -112.830 Interval 7815 (3907000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1709 10 episodes - episode_reward: -164.531 [-248.877, -71.987] - loss: 13.231 - mae: 85.378 - mean_q: -112.836 Interval 7816 (3907500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8379 8 episodes - episode_reward: -174.626 [-265.883, -135.515] - loss: 11.250 - mae: 85.359 - mean_q: -112.822 Interval 7817 (3908000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4966 7 episodes - episode_reward: -181.442 [-251.938, -140.905] - loss: 12.608 - mae: 85.377 - mean_q: -112.803 Interval 7818 (3908500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5919 7 episodes - episode_reward: -176.049 [-220.330, -121.608] - loss: 9.512 - mae: 85.352 - mean_q: -112.792 Interval 7819 (3909000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8962 9 episodes - episode_reward: -168.151 [-232.009, -100.000] - loss: 9.345 - mae: 85.354 - mean_q: -112.808 Interval 7820 (3909500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5954 8 episodes - episode_reward: -163.430 [-238.043, -121.749] - loss: 10.444 - mae: 85.350 - mean_q: -112.818 Interval 7821 (3910000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2259 8 episodes - episode_reward: -197.656 [-355.541, -100.000] - loss: 9.916 - mae: 85.343 - mean_q: -112.821 Interval 7822 (3910500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2835 6 episodes - episode_reward: -182.710 [-210.824, -146.278] - loss: 8.861 - mae: 85.342 - mean_q: -112.822 Interval 7823 (3911000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5317 7 episodes - episode_reward: -183.635 [-310.765, -11.566] - loss: 13.681 - mae: 85.364 - mean_q: -112.831 Interval 7824 (3911500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6120 7 episodes - episode_reward: -181.892 [-234.298, -141.379] - loss: 10.289 - mae: 85.361 - mean_q: -112.804 Interval 7825 (3912000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3499 10 episodes - episode_reward: -172.442 [-220.406, -100.000] - loss: 13.068 - mae: 85.371 - mean_q: -112.795 Interval 7826 (3912500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6345 8 episodes - episode_reward: -161.507 [-208.333, -55.776] - loss: 11.756 - mae: 85.362 - mean_q: -112.753 Interval 7827 (3913000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9049 9 episodes - episode_reward: -166.236 [-225.553, -100.000] - loss: 8.570 - mae: 85.342 - mean_q: -112.760 Interval 7828 (3913500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3563 7 episodes - episode_reward: -167.926 [-243.110, -84.978] - loss: 13.523 - mae: 85.360 - mean_q: -112.751 Interval 7829 (3914000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6318 7 episodes - episode_reward: -192.332 [-271.900, -116.062] - loss: 12.592 - mae: 85.337 - mean_q: -112.714 Interval 7830 (3914500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5524 7 episodes - episode_reward: -180.828 [-202.838, -150.140] - loss: 12.591 - mae: 85.325 - mean_q: -112.682 Interval 7831 (3915000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2951 7 episodes - episode_reward: -161.982 [-204.331, -133.868] - loss: 13.349 - mae: 85.312 - mean_q: -112.666 Interval 7832 (3915500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2063 6 episodes - episode_reward: -178.580 [-220.496, -137.185] - loss: 12.192 - mae: 85.291 - mean_q: -112.634 Interval 7833 (3916000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7986 9 episodes - episode_reward: -158.816 [-208.317, -125.518] - loss: 8.691 - mae: 85.254 - mean_q: -112.619 Interval 7834 (3916500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0135 7 episodes - episode_reward: -209.310 [-274.828, -147.983] - loss: 9.976 - mae: 85.249 - mean_q: -112.634 Interval 7835 (3917000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1106 6 episodes - episode_reward: -183.999 [-242.558, -132.672] - loss: 11.773 - mae: 85.264 - mean_q: -112.629 Interval 7836 (3917500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9999 8 episodes - episode_reward: -185.949 [-269.541, -121.862] - loss: 8.313 - mae: 85.239 - mean_q: -112.618 Interval 7837 (3918000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5911 7 episodes - episode_reward: -193.496 [-350.298, -142.283] - loss: 12.275 - mae: 85.256 - mean_q: -112.615 Interval 7838 (3918500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8579 7 episodes - episode_reward: -202.614 [-284.612, -136.047] - loss: 9.883 - mae: 85.236 - mean_q: -112.619 Interval 7839 (3919000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7854 9 episodes - episode_reward: -200.454 [-480.474, -100.000] - loss: 9.692 - mae: 85.242 - mean_q: -112.607 Interval 7840 (3919500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7618 7 episodes - episode_reward: -188.822 [-258.121, -76.310] - loss: 10.028 - mae: 85.212 - mean_q: -112.615 Interval 7841 (3920000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5779 12 episodes - episode_reward: -156.878 [-210.191, -100.000] - loss: 12.813 - mae: 85.186 - mean_q: -112.607 Interval 7842 (3920500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0672 7 episodes - episode_reward: -219.751 [-306.161, -139.520] - loss: 13.394 - mae: 85.158 - mean_q: -112.570 Interval 7843 (3921000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0271 5 episodes - episode_reward: -504.331 [-743.272, -129.612] - loss: 10.462 - mae: 85.103 - mean_q: -112.514 Interval 7844 (3921500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7361 3 episodes - episode_reward: -784.406 [-1226.812, -538.577] - loss: 9.318 - mae: 85.088 - mean_q: -112.476 Interval 7845 (3922000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6415 6 episodes - episode_reward: -303.706 [-579.611, -135.201] - loss: 9.623 - mae: 85.126 - mean_q: -112.522 Interval 7846 (3922500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7333 8 episodes - episode_reward: -171.015 [-231.412, -84.700] - loss: 12.114 - mae: 85.143 - mean_q: -112.503 Interval 7847 (3923000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8102 8 episodes - episode_reward: -173.071 [-266.725, -121.995] - loss: 8.784 - mae: 85.149 - mean_q: -112.496 Interval 7848 (3923500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0866 8 episodes - episode_reward: -196.119 [-346.149, -114.366] - loss: 13.778 - mae: 85.190 - mean_q: -112.480 Interval 7849 (3924000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6045 8 episodes - episode_reward: -163.337 [-214.146, -112.276] - loss: 17.578 - mae: 85.230 - mean_q: -112.460 Interval 7850 (3924500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7830 7 episodes - episode_reward: -187.665 [-263.380, -132.295] - loss: 11.570 - mae: 85.211 - mean_q: -112.421 Interval 7851 (3925000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5282 8 episodes - episode_reward: -165.346 [-216.610, -117.250] - loss: 11.116 - mae: 85.217 - mean_q: -112.434 Interval 7852 (3925500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9602 7 episodes - episode_reward: -142.953 [-234.670, 86.666] - loss: 12.669 - mae: 85.243 - mean_q: -112.431 Interval 7853 (3926000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2889 9 episodes - episode_reward: -179.545 [-302.286, -100.000] - loss: 10.541 - mae: 85.221 - mean_q: -112.415 Interval 7854 (3926500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6823 8 episodes - episode_reward: -161.003 [-209.810, -118.381] - loss: 10.846 - mae: 85.211 - mean_q: -112.414 Interval 7855 (3927000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2415 8 episodes - episode_reward: -204.867 [-268.839, -165.734] - loss: 13.295 - mae: 85.240 - mean_q: -112.404 Interval 7856 (3927500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9042 8 episodes - episode_reward: -189.857 [-292.620, -126.237] - loss: 11.648 - mae: 85.235 - mean_q: -112.372 Interval 7857 (3928000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9337 7 episodes - episode_reward: -202.590 [-234.212, -168.572] - loss: 11.246 - mae: 85.248 - mean_q: -112.378 Interval 7858 (3928500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9767 9 episodes - episode_reward: -158.238 [-210.222, -86.611] - loss: 10.892 - mae: 85.253 - mean_q: -112.386 Interval 7859 (3929000 steps performed) 500/500 [==============================] - ETA: 0s - reward: -2.72 - 2s 4ms/step - reward: -2.7044 8 episodes - episode_reward: -177.097 [-276.962, -108.675] - loss: 12.298 - mae: 85.245 - mean_q: -112.382 Interval 7860 (3929500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6597 7 episodes - episode_reward: -193.041 [-268.874, -134.846] - loss: 13.520 - mae: 85.229 - mean_q: -112.352 Interval 7861 (3930000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8488 8 episodes - episode_reward: -174.159 [-254.846, -118.460] - loss: 12.803 - mae: 85.217 - mean_q: -112.322 Interval 7862 (3930500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7195 9 episodes - episode_reward: -149.728 [-231.954, -100.000] - loss: 8.484 - mae: 85.191 - mean_q: -112.311 Interval 7863 (3931000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5537 7 episodes - episode_reward: -185.868 [-249.587, -134.990] - loss: 13.130 - mae: 85.193 - mean_q: -112.278 Interval 7864 (3931500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4662 7 episodes - episode_reward: -171.469 [-206.741, -136.771] - loss: 11.818 - mae: 85.176 - mean_q: -112.258 Interval 7865 (3932000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7061 9 episodes - episode_reward: -212.580 [-504.355, -100.000] - loss: 11.637 - mae: 85.166 - mean_q: -112.251 Interval 7866 (3932500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5413 7 episodes - episode_reward: -175.218 [-228.904, -148.888] - loss: 12.955 - mae: 85.159 - mean_q: -112.219 Interval 7867 (3933000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3367 9 episodes - episode_reward: -177.730 [-219.200, -111.198] - loss: 11.001 - mae: 85.123 - mean_q: -112.233 Interval 7868 (3933500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5669 8 episodes - episode_reward: -172.757 [-235.785, -139.404] - loss: 9.136 - mae: 85.116 - mean_q: -112.258 Interval 7869 (3934000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3415 6 episodes - episode_reward: -203.676 [-246.976, -169.496] - loss: 11.565 - mae: 85.116 - mean_q: -112.276 Interval 7870 (3934500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5919 6 episodes - episode_reward: -207.000 [-261.776, -162.662] - loss: 14.361 - mae: 85.131 - mean_q: -112.252 Interval 7871 (3935000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9679 8 episodes - episode_reward: -189.594 [-255.218, -128.644] - loss: 9.779 - mae: 85.103 - mean_q: -112.246 Interval 7872 (3935500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0089 9 episodes - episode_reward: -166.687 [-225.548, -100.000] - loss: 12.988 - mae: 85.109 - mean_q: -112.250 Interval 7873 (3936000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5059 7 episodes - episode_reward: -164.024 [-210.770, -136.975] - loss: 12.658 - mae: 85.119 - mean_q: -112.232 Interval 7874 (3936500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5580 8 episodes - episode_reward: -171.041 [-242.793, 37.078] - loss: 13.470 - mae: 85.108 - mean_q: -112.240 Interval 7875 (3937000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5351 7 episodes - episode_reward: -183.521 [-300.984, -105.196] - loss: 11.008 - mae: 85.096 - mean_q: -112.228 Interval 7876 (3937500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8295 6 episodes - episode_reward: -144.717 [-193.011, -36.757] - loss: 11.245 - mae: 85.107 - mean_q: -112.225 Interval 7877 (3938000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3403 8 episodes - episode_reward: -211.029 [-302.908, -142.707] - loss: 13.544 - mae: 85.116 - mean_q: -112.202 Interval 7878 (3938500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8860 8 episodes - episode_reward: -182.323 [-238.090, -141.162] - loss: 11.796 - mae: 85.086 - mean_q: -112.194 Interval 7879 (3939000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8804 8 episodes - episode_reward: -178.212 [-264.391, -116.406] - loss: 12.650 - mae: 85.070 - mean_q: -112.181 Interval 7880 (3939500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3054 6 episodes - episode_reward: -190.151 [-277.890, -18.042] - loss: 12.108 - mae: 85.074 - mean_q: -112.162 Interval 7881 (3940000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9484 8 episodes - episode_reward: -176.895 [-263.587, -77.956] - loss: 9.344 - mae: 85.068 - mean_q: -112.160 Interval 7882 (3940500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2333 8 episodes - episode_reward: -209.510 [-267.790, -160.405] - loss: 9.180 - mae: 85.056 - mean_q: -112.173 Interval 7883 (3941000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2882 6 episodes - episode_reward: -179.211 [-264.091, -119.416] - loss: 13.343 - mae: 85.064 - mean_q: -112.156 Interval 7884 (3941500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2320 7 episodes - episode_reward: -174.849 [-234.386, -137.903] - loss: 11.487 - mae: 85.037 - mean_q: -112.147 Interval 7885 (3942000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1108 8 episodes - episode_reward: -146.724 [-246.036, 76.542] - loss: 10.669 - mae: 85.026 - mean_q: -112.138 Interval 7886 (3942500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0437 9 episodes - episode_reward: -151.142 [-235.066, -6.539] - loss: 10.477 - mae: 85.019 - mean_q: -112.137 Interval 7887 (3943000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9322 9 episodes - episode_reward: -164.304 [-257.971, -100.000] - loss: 10.647 - mae: 85.010 - mean_q: -112.161 Interval 7888 (3943500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0377 9 episodes - episode_reward: -172.398 [-433.832, 37.218] - loss: 12.388 - mae: 85.018 - mean_q: -112.135 Interval 7889 (3944000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4133 7 episodes - episode_reward: -157.139 [-250.057, -96.002] - loss: 8.150 - mae: 84.994 - mean_q: -112.137 Interval 7890 (3944500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5551 9 episodes - episode_reward: -151.803 [-212.822, -15.127] - loss: 9.112 - mae: 85.010 - mean_q: -112.163 Interval 7891 (3945000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1946 8 episodes - episode_reward: -201.265 [-259.375, -100.000] - loss: 13.148 - mae: 85.024 - mean_q: -112.137 Interval 7892 (3945500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4205 7 episodes - episode_reward: -167.322 [-217.629, -138.165] - loss: 13.058 - mae: 85.019 - mean_q: -112.101 Interval 7893 (3946000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9269 9 episodes - episode_reward: -163.190 [-282.945, -115.145] - loss: 11.131 - mae: 85.008 - mean_q: -112.075 Interval 7894 (3946500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1957 10 episodes - episode_reward: -163.451 [-218.965, -100.000] - loss: 12.679 - mae: 84.995 - mean_q: -112.046 Interval 7895 (3947000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3539 6 episodes - episode_reward: -194.449 [-225.160, -149.996] - loss: 11.029 - mae: 84.956 - mean_q: -112.042 Interval 7896 (3947500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1531 7 episodes - episode_reward: -223.596 [-337.035, -144.263] - loss: 9.844 - mae: 84.917 - mean_q: -112.053 Interval 7897 (3948000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5813 7 episodes - episode_reward: -172.900 [-213.722, -129.713] - loss: 12.799 - mae: 84.900 - mean_q: -112.035 Interval 7898 (3948500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1146 7 episodes - episode_reward: -160.146 [-188.204, -92.295] - loss: 11.290 - mae: 84.862 - mean_q: -112.016 Interval 7899 (3949000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8231 7 episodes - episode_reward: -192.269 [-295.786, -131.737] - loss: 7.632 - mae: 84.812 - mean_q: -112.009 Interval 7900 (3949500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4793 7 episodes - episode_reward: -188.229 [-281.766, -88.336] - loss: 9.590 - mae: 84.802 - mean_q: -112.023 Interval 7901 (3950000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6188 8 episodes - episode_reward: -165.332 [-282.005, -115.100] - loss: 10.124 - mae: 84.794 - mean_q: -112.015 Interval 7902 (3950500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2735 8 episodes - episode_reward: -196.596 [-340.771, -105.376] - loss: 10.315 - mae: 84.758 - mean_q: -112.012 Interval 7903 (3951000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9320 7 episodes - episode_reward: -151.220 [-233.315, 9.747] - loss: 12.426 - mae: 84.738 - mean_q: -111.991 Interval 7904 (3951500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7067 6 episodes - episode_reward: -133.121 [-232.147, 28.575] - loss: 11.804 - mae: 84.724 - mean_q: -111.975 Interval 7905 (3952000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3614 7 episodes - episode_reward: -150.926 [-258.628, 22.958] - loss: 10.677 - mae: 84.698 - mean_q: -111.953 Interval 7906 (3952500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1685 9 episodes - episode_reward: -183.381 [-265.389, -141.064] - loss: 13.085 - mae: 84.684 - mean_q: -111.945 Interval 7907 (3953000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2836 8 episodes - episode_reward: -210.635 [-458.480, -100.000] - loss: 13.466 - mae: 84.664 - mean_q: -111.918 Interval 7908 (3953500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6742 11 episodes - episode_reward: -170.721 [-231.314, -100.000] - loss: 12.356 - mae: 84.640 - mean_q: -111.874 Interval 7909 (3954000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0549 8 episodes - episode_reward: -187.100 [-252.502, -141.797] - loss: 8.240 - mae: 84.594 - mean_q: -111.853 Interval 7910 (3954500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0067 7 episodes - episode_reward: -212.682 [-345.812, -144.705] - loss: 17.722 - mae: 84.629 - mean_q: -111.806 Interval 7911 (3955000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9487 8 episodes - episode_reward: -190.162 [-301.127, -107.752] - loss: 11.169 - mae: 84.574 - mean_q: -111.778 Interval 7912 (3955500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5687 7 episodes - episode_reward: -182.097 [-215.936, -133.275] - loss: 12.079 - mae: 84.547 - mean_q: -111.773 Interval 7913 (3956000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5803 7 episodes - episode_reward: -186.814 [-276.713, -131.658] - loss: 7.890 - mae: 84.513 - mean_q: -111.755 Interval 7914 (3956500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5246 6 episodes - episode_reward: -195.586 [-212.761, -158.526] - loss: 11.948 - mae: 84.520 - mean_q: -111.760 Interval 7915 (3957000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9575 9 episodes - episode_reward: -170.075 [-227.677, -117.081] - loss: 12.401 - mae: 84.508 - mean_q: -111.753 Interval 7916 (3957500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7966 6 episodes - episode_reward: -144.383 [-267.649, 17.435] - loss: 12.831 - mae: 84.500 - mean_q: -111.720 Interval 7917 (3958000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5963 8 episodes - episode_reward: -169.774 [-200.503, -140.057] - loss: 10.669 - mae: 84.480 - mean_q: -111.706 Interval 7918 (3958500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7915 8 episodes - episode_reward: -173.649 [-225.553, -125.755] - loss: 13.902 - mae: 84.459 - mean_q: -111.681 Interval 7919 (3959000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2276 7 episodes - episode_reward: -162.704 [-235.312, -85.713] - loss: 9.386 - mae: 84.441 - mean_q: -111.689 Interval 7920 (3959500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1497 9 episodes - episode_reward: -174.560 [-227.806, -100.000] - loss: 12.277 - mae: 84.429 - mean_q: -111.675 Interval 7921 (3960000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9100 9 episodes - episode_reward: -158.833 [-215.784, -100.000] - loss: 12.513 - mae: 84.421 - mean_q: -111.645 Interval 7922 (3960500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8851 9 episodes - episode_reward: -159.556 [-234.745, -100.000] - loss: 9.920 - mae: 84.395 - mean_q: -111.645 Interval 7923 (3961000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6257 7 episodes - episode_reward: -184.200 [-233.562, -110.547] - loss: 10.821 - mae: 84.390 - mean_q: -111.635 Interval 7924 (3961500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9394 9 episodes - episode_reward: -166.641 [-217.555, -10.097] - loss: 10.227 - mae: 84.384 - mean_q: -111.636 Interval 7925 (3962000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0515 9 episodes - episode_reward: -167.554 [-262.375, -100.000] - loss: 10.841 - mae: 84.383 - mean_q: -111.620 Interval 7926 (3962500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2246 7 episodes - episode_reward: -165.105 [-217.934, -93.747] - loss: 9.337 - mae: 84.360 - mean_q: -111.609 Interval 7927 (3963000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4780 7 episodes - episode_reward: -167.866 [-290.301, -108.820] - loss: 8.855 - mae: 84.344 - mean_q: -111.592 Interval 7928 (3963500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3454 8 episodes - episode_reward: -204.216 [-286.026, -155.542] - loss: 11.775 - mae: 84.345 - mean_q: -111.559 Interval 7929 (3964000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4646 7 episodes - episode_reward: -185.479 [-268.116, -122.564] - loss: 11.123 - mae: 84.322 - mean_q: -111.552 Interval 7930 (3964500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9803 9 episodes - episode_reward: -167.831 [-191.899, -137.555] - loss: 12.277 - mae: 84.327 - mean_q: -111.561 Interval 7931 (3965000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4130 6 episodes - episode_reward: -193.575 [-239.727, -163.048] - loss: 9.524 - mae: 84.304 - mean_q: -111.527 Interval 7932 (3965500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1624 9 episodes - episode_reward: -179.810 [-312.903, -100.000] - loss: 11.722 - mae: 84.286 - mean_q: -111.547 Interval 7933 (3966000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2535 8 episodes - episode_reward: -143.078 [-174.402, -51.124] - loss: 11.259 - mae: 84.282 - mean_q: -111.538 Interval 7934 (3966500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2093 7 episodes - episode_reward: -181.957 [-327.160, -147.254] - loss: 10.644 - mae: 84.273 - mean_q: -111.553 Interval 7935 (3967000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.5016 8 episodes - episode_reward: -425.804 [-902.896, -177.623] - loss: 11.193 - mae: 84.274 - mean_q: -111.541 Interval 7936 (3967500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1313 8 episodes - episode_reward: -211.863 [-341.976, -133.258] - loss: 13.034 - mae: 84.286 - mean_q: -111.535 Interval 7937 (3968000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1052 8 episodes - episode_reward: -199.263 [-296.043, -119.884] - loss: 11.220 - mae: 84.286 - mean_q: -111.511 Interval 7938 (3968500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8406 7 episodes - episode_reward: -193.359 [-257.732, -145.397] - loss: 13.620 - mae: 84.302 - mean_q: -111.481 Interval 7939 (3969000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1874 7 episodes - episode_reward: -238.244 [-379.337, -150.869] - loss: 12.935 - mae: 84.297 - mean_q: -111.455 Interval 7940 (3969500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3908 7 episodes - episode_reward: -224.544 [-458.499, -127.492] - loss: 11.061 - mae: 84.304 - mean_q: -111.440 Interval 7941 (3970000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3271 8 episodes - episode_reward: -211.781 [-282.552, -139.645] - loss: 10.512 - mae: 84.298 - mean_q: -111.443 Interval 7942 (3970500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4488 7 episodes - episode_reward: -187.587 [-256.619, -84.721] - loss: 16.288 - mae: 84.318 - mean_q: -111.415 Interval 7943 (3971000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1603 9 episodes - episode_reward: -172.646 [-234.320, -100.000] - loss: 11.336 - mae: 84.271 - mean_q: -111.390 Interval 7944 (3971500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1830 8 episodes - episode_reward: -188.590 [-310.476, -149.527] - loss: 12.073 - mae: 84.245 - mean_q: -111.380 Interval 7945 (3972000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1324 8 episodes - episode_reward: -194.017 [-248.994, -131.937] - loss: 10.856 - mae: 84.213 - mean_q: -111.381 Interval 7946 (3972500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8226 9 episodes - episode_reward: -224.268 [-310.522, -137.689] - loss: 9.633 - mae: 84.177 - mean_q: -111.396 Interval 7947 (3973000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8378 7 episodes - episode_reward: -192.232 [-278.916, -133.107] - loss: 13.049 - mae: 84.180 - mean_q: -111.407 Interval 7948 (3973500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8891 9 episodes - episode_reward: -163.915 [-226.333, -100.000] - loss: 12.216 - mae: 84.158 - mean_q: -111.409 Interval 7949 (3974000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4593 7 episodes - episode_reward: -179.923 [-223.584, -146.374] - loss: 8.970 - mae: 84.113 - mean_q: -111.428 Interval 7950 (3974500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.6800 6 episodes - episode_reward: -396.414 [-642.107, -73.228] - loss: 12.485 - mae: 84.127 - mean_q: -111.396 Interval 7951 (3975000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8583 5 episodes - episode_reward: -563.440 [-1465.661, -198.180] - loss: 10.776 - mae: 84.129 - mean_q: -111.395 Interval 7952 (3975500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0313 9 episodes - episode_reward: -176.082 [-218.520, -86.334] - loss: 12.742 - mae: 84.172 - mean_q: -111.401 Interval 7953 (3976000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1821 8 episodes - episode_reward: -191.834 [-238.541, -113.200] - loss: 10.791 - mae: 84.202 - mean_q: -111.394 Interval 7954 (3976500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8386 7 episodes - episode_reward: -211.972 [-443.005, -136.878] - loss: 13.965 - mae: 84.198 - mean_q: -111.412 Interval 7955 (3977000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6071 8 episodes - episode_reward: -166.708 [-253.491, -57.797] - loss: 12.604 - mae: 84.208 - mean_q: -111.426 Interval 7956 (3977500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7616 5 episodes - episode_reward: -162.826 [-332.493, 13.661] - loss: 13.073 - mae: 84.219 - mean_q: -111.418 Interval 7957 (3978000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6650 7 episodes - episode_reward: -190.693 [-288.964, -140.431] - loss: 12.297 - mae: 84.237 - mean_q: -111.433 Interval 7958 (3978500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4144 9 episodes - episode_reward: -197.091 [-306.265, -111.029] - loss: 10.450 - mae: 84.244 - mean_q: -111.440 Interval 7959 (3979000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9946 7 episodes - episode_reward: -212.046 [-281.718, -171.165] - loss: 13.253 - mae: 84.271 - mean_q: -111.444 Interval 7960 (3979500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5325 7 episodes - episode_reward: -172.702 [-278.312, -72.758] - loss: 9.134 - mae: 84.252 - mean_q: -111.463 Interval 7961 (3980000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5195 7 episodes - episode_reward: -181.109 [-257.473, -81.362] - loss: 12.890 - mae: 84.292 - mean_q: -111.466 Interval 7962 (3980500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0575 8 episodes - episode_reward: -189.722 [-212.884, -134.553] - loss: 11.644 - mae: 84.288 - mean_q: -111.471 Interval 7963 (3981000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2319 10 episodes - episode_reward: -166.820 [-268.503, -100.000] - loss: 12.003 - mae: 84.311 - mean_q: -111.495 Interval 7964 (3981500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1966 7 episodes - episode_reward: -154.086 [-219.877, -11.147] - loss: 12.294 - mae: 84.332 - mean_q: -111.493 Interval 7965 (3982000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3026 6 episodes - episode_reward: -192.566 [-286.242, -143.864] - loss: 15.231 - mae: 84.346 - mean_q: -111.482 Interval 7966 (3982500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0209 10 episodes - episode_reward: -154.579 [-186.764, -116.043] - loss: 10.622 - mae: 84.340 - mean_q: -111.470 Interval 7967 (3983000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0143 6 episodes - episode_reward: -164.223 [-290.464, 19.135] - loss: 11.032 - mae: 84.341 - mean_q: -111.501 Interval 7968 (3983500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4583 8 episodes - episode_reward: -152.372 [-238.529, -76.962] - loss: 7.962 - mae: 84.345 - mean_q: -111.529 Interval 7969 (3984000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7430 7 episodes - episode_reward: -194.031 [-245.090, -109.568] - loss: 11.796 - mae: 84.357 - mean_q: -111.564 Interval 7970 (3984500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9666 8 episodes - episode_reward: -180.544 [-241.767, -132.858] - loss: 10.183 - mae: 84.380 - mean_q: -111.569 Interval 7971 (3985000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4168 7 episodes - episode_reward: -175.111 [-217.520, -122.606] - loss: 12.830 - mae: 84.394 - mean_q: -111.566 Interval 7972 (3985500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6248 8 episodes - episode_reward: -231.116 [-373.842, -175.570] - loss: 11.889 - mae: 84.397 - mean_q: -111.580 Interval 7973 (3986000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7692 8 episodes - episode_reward: -170.108 [-226.602, -31.969] - loss: 9.859 - mae: 84.404 - mean_q: -111.588 Interval 7974 (3986500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9317 8 episodes - episode_reward: -250.186 [-505.916, -100.000] - loss: 16.072 - mae: 84.437 - mean_q: -111.589 Interval 7975 (3987000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0400 7 episodes - episode_reward: -196.161 [-249.268, -125.379] - loss: 14.992 - mae: 84.451 - mean_q: -111.575 Interval 7976 (3987500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2022 9 episodes - episode_reward: -193.055 [-262.176, -116.140] - loss: 10.784 - mae: 84.439 - mean_q: -111.548 Interval 7977 (3988000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6265 7 episodes - episode_reward: -119.019 [-161.173, -45.816] - loss: 13.172 - mae: 84.455 - mean_q: -111.570 Interval 7978 (3988500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0328 7 episodes - episode_reward: -212.456 [-294.184, -172.057] - loss: 15.206 - mae: 84.459 - mean_q: -111.573 Interval 7979 (3989000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4124 10 episodes - episode_reward: -173.086 [-272.972, -100.000] - loss: 11.378 - mae: 84.449 - mean_q: -111.565 Interval 7980 (3989500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7962 7 episodes - episode_reward: -192.320 [-243.903, -133.957] - loss: 15.597 - mae: 84.467 - mean_q: -111.546 Interval 7981 (3990000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1384 9 episodes - episode_reward: -181.177 [-251.347, -109.044] - loss: 11.359 - mae: 84.429 - mean_q: -111.556 Interval 7982 (3990500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6829 8 episodes - episode_reward: -168.352 [-221.198, -115.846] - loss: 11.913 - mae: 84.432 - mean_q: -111.551 Interval 7983 (3991000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4568 7 episodes - episode_reward: -169.562 [-191.073, -130.731] - loss: 13.864 - mae: 84.430 - mean_q: -111.530 Interval 7984 (3991500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4871 6 episodes - episode_reward: -204.803 [-276.647, -155.475] - loss: 9.949 - mae: 84.418 - mean_q: -111.529 Interval 7985 (3992000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7501 6 episodes - episode_reward: -227.925 [-354.257, -174.386] - loss: 9.602 - mae: 84.439 - mean_q: -111.557 Interval 7986 (3992500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7071 7 episodes - episode_reward: -191.613 [-264.668, -131.349] - loss: 11.287 - mae: 84.452 - mean_q: -111.560 Interval 7987 (3993000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2020 9 episodes - episode_reward: -181.986 [-275.447, -118.268] - loss: 8.669 - mae: 84.472 - mean_q: -111.573 Interval 7988 (3993500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6237 7 episodes - episode_reward: -178.314 [-226.024, -126.409] - loss: 11.505 - mae: 84.491 - mean_q: -111.594 Interval 7989 (3994000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3118 8 episodes - episode_reward: -213.040 [-336.854, -143.812] - loss: 12.397 - mae: 84.494 - mean_q: -111.607 Interval 7990 (3994500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9987 9 episodes - episode_reward: -168.479 [-214.744, -111.657] - loss: 11.294 - mae: 84.493 - mean_q: -111.616 Interval 7991 (3995000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9311 7 episodes - episode_reward: -205.661 [-281.136, -141.950] - loss: 12.406 - mae: 84.511 - mean_q: -111.629 Interval 7992 (3995500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6274 8 episodes - episode_reward: -161.898 [-258.762, -34.106] - loss: 10.096 - mae: 84.507 - mean_q: -111.643 Interval 7993 (3996000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3129 10 episodes - episode_reward: -173.814 [-247.234, -100.000] - loss: 11.703 - mae: 84.525 - mean_q: -111.653 Interval 7994 (3996500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3787 6 episodes - episode_reward: -193.311 [-241.873, -156.920] - loss: 12.535 - mae: 84.542 - mean_q: -111.651 Interval 7995 (3997000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3106 9 episodes - episode_reward: -187.952 [-251.489, -135.132] - loss: 14.683 - mae: 84.547 - mean_q: -111.652 Interval 7996 (3997500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0135 8 episodes - episode_reward: -187.540 [-235.425, -127.502] - loss: 11.382 - mae: 84.536 - mean_q: -111.656 Interval 7997 (3998000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3225 8 episodes - episode_reward: -201.749 [-319.855, -108.751] - loss: 12.564 - mae: 84.566 - mean_q: -111.664 Interval 7998 (3998500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9133 8 episodes - episode_reward: -189.085 [-377.634, -119.877] - loss: 10.155 - mae: 84.565 - mean_q: -111.703 Interval 7999 (3999000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4906 7 episodes - episode_reward: -170.360 [-234.117, -132.446] - loss: 12.959 - mae: 84.575 - mean_q: -111.740 Interval 8000 (3999500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6779 9 episodes - episode_reward: -153.314 [-215.923, -100.000] - loss: 11.733 - mae: 84.573 - mean_q: -111.767 Interval 8001 (4000000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8890 8 episodes - episode_reward: -172.331 [-236.085, -100.000] - loss: 13.590 - mae: 84.586 - mean_q: -111.791 Interval 8002 (4000500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2285 9 episodes - episode_reward: -190.058 [-275.214, -100.000] - loss: 11.205 - mae: 84.573 - mean_q: -111.817 Interval 8003 (4001000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4692 9 episodes - episode_reward: -190.681 [-379.947, -110.451] - loss: 13.380 - mae: 84.591 - mean_q: -111.823 Interval 8004 (4001500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7578 8 episodes - episode_reward: -165.617 [-224.625, -77.536] - loss: 11.549 - mae: 84.606 - mean_q: -111.855 Interval 8005 (4002000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4412 7 episodes - episode_reward: -176.853 [-204.383, -126.809] - loss: 9.584 - mae: 84.616 - mean_q: -111.885 Interval 8006 (4002500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8106 7 episodes - episode_reward: -175.559 [-363.928, -5.451] - loss: 8.251 - mae: 84.622 - mean_q: -111.934 Interval 8007 (4003000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5957 10 episodes - episode_reward: -196.964 [-353.736, -100.000] - loss: 10.282 - mae: 84.648 - mean_q: -111.970 Interval 8008 (4003500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4264 9 episodes - episode_reward: -184.702 [-253.565, -121.005] - loss: 11.238 - mae: 84.677 - mean_q: -112.004 Interval 8009 (4004000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1751 9 episodes - episode_reward: -182.640 [-250.145, -114.216] - loss: 11.720 - mae: 84.679 - mean_q: -112.027 Interval 8010 (4004500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4806 6 episodes - episode_reward: -202.705 [-265.324, -141.410] - loss: 13.192 - mae: 84.696 - mean_q: -112.051 Interval 8011 (4005000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2826 8 episodes - episode_reward: -210.857 [-321.247, -146.328] - loss: 10.139 - mae: 84.697 - mean_q: -112.048 Interval 8012 (4005500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6379 6 episodes - episode_reward: -198.979 [-329.870, -110.704] - loss: 10.709 - mae: 84.705 - mean_q: -112.090 Interval 8013 (4006000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5792 8 episodes - episode_reward: -166.063 [-278.151, -100.000] - loss: 9.081 - mae: 84.707 - mean_q: -112.110 Interval 8014 (4006500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9267 9 episodes - episode_reward: -168.115 [-228.138, -100.000] - loss: 10.951 - mae: 84.730 - mean_q: -112.121 Interval 8015 (4007000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9982 8 episodes - episode_reward: -190.973 [-234.939, -138.621] - loss: 13.124 - mae: 84.771 - mean_q: -112.115 Interval 8016 (4007500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3095 10 episodes - episode_reward: -162.482 [-259.471, -108.413] - loss: 10.960 - mae: 84.785 - mean_q: -112.121 Interval 8017 (4008000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8415 8 episodes - episode_reward: -170.704 [-205.916, -145.983] - loss: 10.750 - mae: 84.795 - mean_q: -112.164 Interval 8018 (4008500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9123 7 episodes - episode_reward: -351.226 [-1014.740, -195.766] - loss: 8.935 - mae: 84.815 - mean_q: -112.204 Interval 8019 (4009000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7957 8 episodes - episode_reward: -244.305 [-425.851, -163.674] - loss: 11.883 - mae: 84.872 - mean_q: -112.240 Interval 8020 (4009500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9292 7 episodes - episode_reward: -145.291 [-228.065, -7.754] - loss: 11.128 - mae: 84.906 - mean_q: -112.279 Interval 8021 (4010000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6090 7 episodes - episode_reward: -184.510 [-227.900, -148.980] - loss: 10.868 - mae: 84.947 - mean_q: -112.332 Interval 8022 (4010500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2910 9 episodes - episode_reward: -178.203 [-299.942, -100.000] - loss: 11.206 - mae: 84.973 - mean_q: -112.384 Interval 8023 (4011000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0938 9 episodes - episode_reward: -171.836 [-229.263, -50.018] - loss: 11.333 - mae: 85.023 - mean_q: -112.399 Interval 8024 (4011500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3985 7 episodes - episode_reward: -177.599 [-223.018, -129.169] - loss: 7.736 - mae: 85.031 - mean_q: -112.463 Interval 8025 (4012000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8787 8 episodes - episode_reward: -169.093 [-214.733, -134.942] - loss: 10.238 - mae: 85.089 - mean_q: -112.545 Interval 8026 (4012500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1573 9 episodes - episode_reward: -187.955 [-254.008, -118.361] - loss: 10.318 - mae: 85.112 - mean_q: -112.594 Interval 8027 (4013000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1183 6 episodes - episode_reward: -163.416 [-270.333, -106.644] - loss: 14.773 - mae: 85.179 - mean_q: -112.631 Interval 8028 (4013500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6191 7 episodes - episode_reward: -192.066 [-345.326, -28.174] - loss: 11.770 - mae: 85.211 - mean_q: -112.668 Interval 8029 (4014000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7902 8 episodes - episode_reward: -171.951 [-222.559, -136.703] - loss: 9.982 - mae: 85.223 - mean_q: -112.724 Interval 8030 (4014500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3825 8 episodes - episode_reward: -151.392 [-228.608, -40.040] - loss: 13.030 - mae: 85.284 - mean_q: -112.755 Interval 8031 (4015000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1382 7 episodes - episode_reward: -145.344 [-237.431, 14.737] - loss: 9.836 - mae: 85.329 - mean_q: -112.786 Interval 8032 (4015500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1216 9 episodes - episode_reward: -179.111 [-357.424, -63.308] - loss: 11.788 - mae: 85.370 - mean_q: -112.833 Interval 8033 (4016000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2174 8 episodes - episode_reward: -199.095 [-286.348, -130.207] - loss: 13.116 - mae: 85.401 - mean_q: -112.853 Interval 8034 (4016500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8743 8 episodes - episode_reward: -174.847 [-253.974, -100.000] - loss: 12.324 - mae: 85.419 - mean_q: -112.856 Interval 8035 (4017000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1874 8 episodes - episode_reward: -204.547 [-275.962, -143.079] - loss: 12.732 - mae: 85.422 - mean_q: -112.878 Interval 8036 (4017500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2146 6 episodes - episode_reward: -181.000 [-228.921, -120.105] - loss: 14.227 - mae: 85.436 - mean_q: -112.899 Interval 8037 (4018000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2444 10 episodes - episode_reward: -168.325 [-243.041, -113.779] - loss: 8.307 - mae: 85.425 - mean_q: -112.897 Interval 8038 (4018500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1883 8 episodes - episode_reward: -187.827 [-265.941, -87.030] - loss: 13.972 - mae: 85.462 - mean_q: -112.895 Interval 8039 (4019000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8588 7 episodes - episode_reward: -205.283 [-268.641, -157.928] - loss: 11.234 - mae: 85.456 - mean_q: -112.908 Interval 8040 (4019500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5446 8 episodes - episode_reward: -227.070 [-320.681, -168.456] - loss: 10.578 - mae: 85.462 - mean_q: -112.921 Interval 8041 (4020000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2832 8 episodes - episode_reward: -203.043 [-315.415, -164.344] - loss: 10.168 - mae: 85.473 - mean_q: -112.951 Interval 8042 (4020500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9206 9 episodes - episode_reward: -166.033 [-223.018, -104.222] - loss: 11.354 - mae: 85.462 - mean_q: -112.983 Interval 8043 (4021000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4520 9 episodes - episode_reward: -192.886 [-320.012, -136.846] - loss: 15.627 - mae: 85.480 - mean_q: -112.985 Interval 8044 (4021500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4093 6 episodes - episode_reward: -194.683 [-256.077, -172.212] - loss: 15.707 - mae: 85.489 - mean_q: -112.934 Interval 8045 (4022000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4907 8 episodes - episode_reward: -155.027 [-194.070, -129.550] - loss: 10.202 - mae: 85.469 - mean_q: -112.914 Interval 8046 (4022500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3492 7 episodes - episode_reward: -169.257 [-222.164, -114.579] - loss: 15.291 - mae: 85.480 - mean_q: -112.915 Interval 8047 (4023000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5962 7 episodes - episode_reward: -181.837 [-219.644, -161.517] - loss: 10.413 - mae: 85.458 - mean_q: -112.923 Interval 8048 (4023500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7564 9 episodes - episode_reward: -155.759 [-313.688, -48.475] - loss: 10.899 - mae: 85.461 - mean_q: -112.940 Interval 8049 (4024000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1747 9 episodes - episode_reward: -180.289 [-241.224, -100.000] - loss: 12.738 - mae: 85.442 - mean_q: -112.932 Interval 8050 (4024500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5055 12 episodes - episode_reward: -146.120 [-239.456, -80.990] - loss: 12.237 - mae: 85.416 - mean_q: -112.915 Interval 8051 (4025000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0040 6 episodes - episode_reward: -230.126 [-334.687, -172.467] - loss: 13.967 - mae: 85.376 - mean_q: -112.912 Interval 8052 (4025500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5542 11 episodes - episode_reward: -167.452 [-228.226, -128.555] - loss: 10.321 - mae: 85.344 - mean_q: -112.901 Interval 8053 (4026000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9423 7 episodes - episode_reward: -203.753 [-345.965, -124.064] - loss: 12.825 - mae: 85.322 - mean_q: -112.898 Interval 8054 (4026500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.1433 3 episodes - episode_reward: -729.979 [-1116.277, -109.923] - loss: 8.061 - mae: 85.277 - mean_q: -112.844 Interval 8055 (4027000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7765 6 episodes - episode_reward: -477.796 [-1494.318, -118.606] - loss: 10.846 - mae: 85.301 - mean_q: -112.872 Interval 8056 (4027500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1363 7 episodes - episode_reward: -218.215 [-344.271, -129.672] - loss: 9.766 - mae: 85.334 - mean_q: -112.890 Interval 8057 (4028000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9177 7 episodes - episode_reward: -208.497 [-268.136, -138.048] - loss: 13.165 - mae: 85.361 - mean_q: -112.895 Interval 8058 (4028500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6395 8 episodes - episode_reward: -170.464 [-251.025, 28.258] - loss: 11.605 - mae: 85.361 - mean_q: -112.890 Interval 8059 (4029000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1772 6 episodes - episode_reward: -177.993 [-269.649, -129.798] - loss: 14.071 - mae: 85.373 - mean_q: -112.890 Interval 8060 (4029500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8579 7 episodes - episode_reward: -190.687 [-278.372, -153.130] - loss: 12.182 - mae: 85.362 - mean_q: -112.913 Interval 8061 (4030000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0983 9 episodes - episode_reward: -184.593 [-272.955, -95.433] - loss: 9.985 - mae: 85.367 - mean_q: -112.922 Interval 8062 (4030500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7406 7 episodes - episode_reward: -195.574 [-244.475, -161.098] - loss: 11.725 - mae: 85.388 - mean_q: -112.943 Interval 8063 (4031000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0412 8 episodes - episode_reward: -191.208 [-257.001, -100.000] - loss: 9.700 - mae: 85.403 - mean_q: -112.948 Interval 8064 (4031500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2322 10 episodes - episode_reward: -160.489 [-252.008, -69.168] - loss: 10.970 - mae: 85.412 - mean_q: -112.951 Interval 8065 (4032000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5548 7 episodes - episode_reward: -178.485 [-233.239, -93.390] - loss: 13.441 - mae: 85.429 - mean_q: -112.953 Interval 8066 (4032500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7593 10 episodes - episode_reward: -140.521 [-174.979, -88.434] - loss: 10.070 - mae: 85.434 - mean_q: -112.958 Interval 8067 (4033000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0910 8 episodes - episode_reward: -184.487 [-233.505, -100.000] - loss: 11.377 - mae: 85.449 - mean_q: -112.972 Interval 8068 (4033500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2282 8 episodes - episode_reward: -207.029 [-313.291, -147.315] - loss: 9.790 - mae: 85.459 - mean_q: -113.006 Interval 8069 (4034000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7602 7 episodes - episode_reward: -196.592 [-254.985, -119.627] - loss: 10.478 - mae: 85.471 - mean_q: -113.033 Interval 8070 (4034500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2934 7 episodes - episode_reward: -166.074 [-228.466, -95.036] - loss: 9.295 - mae: 85.489 - mean_q: -113.055 Interval 8071 (4035000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0608 8 episodes - episode_reward: -185.080 [-281.558, -118.932] - loss: 10.165 - mae: 85.500 - mean_q: -113.081 Interval 8072 (4035500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5961 8 episodes - episode_reward: -188.156 [-273.277, -120.938] - loss: 9.791 - mae: 85.504 - mean_q: -113.095 Interval 8073 (4036000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0778 8 episodes - episode_reward: -364.373 [-671.852, -124.837] - loss: 8.784 - mae: 85.518 - mean_q: -113.129 Interval 8074 (4036500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3866 8 episodes - episode_reward: -145.126 [-203.582, -36.234] - loss: 9.446 - mae: 85.556 - mean_q: -113.148 Interval 8075 (4037000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5805 7 episodes - episode_reward: -176.804 [-234.776, -141.750] - loss: 10.035 - mae: 85.589 - mean_q: -113.162 Interval 8076 (4037500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2075 7 episodes - episode_reward: -159.274 [-192.666, -133.040] - loss: 10.645 - mae: 85.612 - mean_q: -113.150 Interval 8077 (4038000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8730 7 episodes - episode_reward: -211.937 [-277.828, -174.930] - loss: 13.746 - mae: 85.635 - mean_q: -113.140 Interval 8078 (4038500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5175 6 episodes - episode_reward: -202.194 [-267.587, -164.201] - loss: 10.212 - mae: 85.623 - mean_q: -113.147 Interval 8079 (4039000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7999 8 episodes - episode_reward: -172.436 [-225.507, -124.693] - loss: 8.003 - mae: 85.640 - mean_q: -113.160 Interval 8080 (4039500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4105 9 episodes - episode_reward: -198.021 [-259.460, -100.000] - loss: 13.486 - mae: 85.678 - mean_q: -113.183 Interval 8081 (4040000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2058 8 episodes - episode_reward: -190.541 [-270.221, -124.424] - loss: 9.720 - mae: 85.675 - mean_q: -113.166 Interval 8082 (4040500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3409 8 episodes - episode_reward: -146.243 [-222.713, -26.091] - loss: 8.724 - mae: 85.683 - mean_q: -113.172 Interval 8083 (4041000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2833 9 episodes - episode_reward: -185.551 [-252.592, -136.488] - loss: 11.251 - mae: 85.714 - mean_q: -113.177 Interval 8084 (4041500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7698 7 episodes - episode_reward: -192.950 [-254.790, -143.258] - loss: 9.876 - mae: 85.722 - mean_q: -113.194 Interval 8085 (4042000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7083 8 episodes - episode_reward: -177.263 [-277.088, -126.272] - loss: 8.967 - mae: 85.727 - mean_q: -113.218 Interval 8086 (4042500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0986 8 episodes - episode_reward: -130.997 [-222.132, 37.487] - loss: 8.246 - mae: 85.750 - mean_q: -113.247 Interval 8087 (4043000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6680 8 episodes - episode_reward: -156.849 [-184.502, -129.137] - loss: 10.903 - mae: 85.784 - mean_q: -113.251 Interval 8088 (4043500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2196 8 episodes - episode_reward: -209.424 [-319.033, -145.982] - loss: 11.712 - mae: 85.801 - mean_q: -113.241 Interval 8089 (4044000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9375 7 episodes - episode_reward: -123.276 [-191.459, 4.845] - loss: 11.550 - mae: 85.814 - mean_q: -113.247 Interval 8090 (4044500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2955 8 episodes - episode_reward: -203.188 [-245.723, -163.586] - loss: 10.506 - mae: 85.822 - mean_q: -113.223 Interval 8091 (4045000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4983 8 episodes - episode_reward: -174.277 [-255.634, -100.000] - loss: 12.439 - mae: 85.844 - mean_q: -113.202 Interval 8092 (4045500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0612 9 episodes - episode_reward: -160.972 [-220.565, -68.903] - loss: 10.082 - mae: 85.824 - mean_q: -113.206 Interval 8093 (4046000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9428 8 episodes - episode_reward: -189.515 [-278.792, -100.000] - loss: 11.685 - mae: 85.834 - mean_q: -113.190 Interval 8094 (4046500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3922 7 episodes - episode_reward: -169.078 [-226.065, -133.994] - loss: 10.581 - mae: 85.835 - mean_q: -113.168 Interval 8095 (4047000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4369 7 episodes - episode_reward: -171.679 [-205.884, -107.681] - loss: 10.737 - mae: 85.841 - mean_q: -113.169 Interval 8096 (4047500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4675 8 episodes - episode_reward: -158.506 [-233.999, 29.440] - loss: 9.916 - mae: 85.832 - mean_q: -113.152 Interval 8097 (4048000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6257 12 episodes - episode_reward: -156.163 [-269.876, -100.000] - loss: 10.209 - mae: 85.837 - mean_q: -113.133 Interval 8098 (4048500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1844 6 episodes - episode_reward: -169.879 [-210.425, -130.279] - loss: 8.195 - mae: 85.812 - mean_q: -113.142 Interval 8099 (4049000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2959 8 episodes - episode_reward: -209.537 [-302.121, -144.024] - loss: 11.259 - mae: 85.824 - mean_q: -113.146 Interval 8100 (4049500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7627 7 episodes - episode_reward: -196.645 [-309.244, -121.395] - loss: 10.825 - mae: 85.827 - mean_q: -113.127 Interval 8101 (4050000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3878 7 episodes - episode_reward: -173.192 [-244.530, -135.375] - loss: 11.331 - mae: 85.825 - mean_q: -113.111 Interval 8102 (4050500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8688 7 episodes - episode_reward: -196.593 [-304.589, -127.245] - loss: 9.658 - mae: 85.807 - mean_q: -113.105 Interval 8103 (4051000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9684 7 episodes - episode_reward: -225.465 [-292.285, -157.227] - loss: 11.492 - mae: 85.814 - mean_q: -113.103 Interval 8104 (4051500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8583 7 episodes - episode_reward: -198.227 [-296.251, -108.460] - loss: 11.854 - mae: 85.813 - mean_q: -113.072 Interval 8105 (4052000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7989 7 episodes - episode_reward: -192.406 [-232.366, -125.129] - loss: 9.912 - mae: 85.806 - mean_q: -113.059 Interval 8106 (4052500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4191 10 episodes - episode_reward: -179.193 [-250.236, -117.358] - loss: 8.251 - mae: 85.807 - mean_q: -113.059 Interval 8107 (4053000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4457 9 episodes - episode_reward: -191.457 [-287.661, -113.443] - loss: 10.912 - mae: 85.811 - mean_q: -113.046 Interval 8108 (4053500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1159 7 episodes - episode_reward: -144.311 [-221.795, -51.653] - loss: 13.553 - mae: 85.817 - mean_q: -113.016 Interval 8109 (4054000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6389 8 episodes - episode_reward: -222.345 [-485.839, -135.809] - loss: 7.124 - mae: 85.777 - mean_q: -113.005 Interval 8110 (4054500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3652 6 episodes - episode_reward: -195.447 [-293.387, -73.873] - loss: 11.038 - mae: 85.791 - mean_q: -113.013 Interval 8111 (4055000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7750 9 episodes - episode_reward: -158.688 [-243.178, 45.373] - loss: 9.442 - mae: 85.770 - mean_q: -113.013 Interval 8112 (4055500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8759 8 episodes - episode_reward: -180.158 [-243.513, -139.920] - loss: 10.704 - mae: 85.770 - mean_q: -113.000 Interval 8113 (4056000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1158 8 episodes - episode_reward: -191.379 [-267.044, -115.884] - loss: 11.667 - mae: 85.774 - mean_q: -112.987 Interval 8114 (4056500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3997 7 episodes - episode_reward: -160.452 [-195.416, -100.000] - loss: 12.406 - mae: 85.772 - mean_q: -112.970 Interval 8115 (4057000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0512 9 episodes - episode_reward: -185.274 [-286.341, -136.507] - loss: 10.565 - mae: 85.754 - mean_q: -112.948 Interval 8116 (4057500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7551 6 episodes - episode_reward: -218.849 [-269.109, -175.739] - loss: 8.483 - mae: 85.735 - mean_q: -112.926 Interval 8117 (4058000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8673 7 episodes - episode_reward: -206.986 [-281.626, -155.552] - loss: 10.588 - mae: 85.726 - mean_q: -112.925 Interval 8118 (4058500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4291 9 episodes - episode_reward: -141.090 [-260.393, -7.017] - loss: 12.304 - mae: 85.714 - mean_q: -112.915 Interval 8119 (4059000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7379 7 episodes - episode_reward: -196.289 [-237.713, -169.809] - loss: 8.180 - mae: 85.662 - mean_q: -112.878 Interval 8120 (4059500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0545 6 episodes - episode_reward: -161.784 [-246.239, -52.810] - loss: 12.248 - mae: 85.659 - mean_q: -112.881 Interval 8121 (4060000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2575 7 episodes - episode_reward: -163.458 [-220.987, -116.009] - loss: 11.635 - mae: 85.624 - mean_q: -112.842 Interval 8122 (4060500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2923 9 episodes - episode_reward: -178.461 [-236.997, -124.455] - loss: 10.587 - mae: 85.593 - mean_q: -112.844 Interval 8123 (4061000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2703 8 episodes - episode_reward: -213.659 [-396.011, -123.670] - loss: 9.154 - mae: 85.584 - mean_q: -112.807 Interval 8124 (4061500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8753 8 episodes - episode_reward: -181.351 [-261.440, -101.210] - loss: 11.991 - mae: 85.577 - mean_q: -112.795 Interval 8125 (4062000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9554 6 episodes - episode_reward: -238.014 [-385.834, -195.118] - loss: 11.096 - mae: 85.548 - mean_q: -112.781 Interval 8126 (4062500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5876 6 episodes - episode_reward: -209.443 [-300.114, -125.765] - loss: 12.402 - mae: 85.535 - mean_q: -112.780 Interval 8127 (4063000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0527 8 episodes - episode_reward: -191.846 [-289.501, -150.011] - loss: 10.837 - mae: 85.520 - mean_q: -112.782 Interval 8128 (4063500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1211 10 episodes - episode_reward: -159.606 [-246.887, -109.974] - loss: 7.988 - mae: 85.496 - mean_q: -112.768 Interval 8129 (4064000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1036 7 episodes - episode_reward: -215.899 [-300.627, -142.263] - loss: 9.111 - mae: 85.488 - mean_q: -112.789 Interval 8130 (4064500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5546 7 episodes - episode_reward: -187.440 [-309.414, -95.269] - loss: 9.127 - mae: 85.482 - mean_q: -112.792 Interval 8131 (4065000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -0.5407 5 episodes - episode_reward: -55.270 [-241.044, 297.095] - loss: 10.800 - mae: 85.466 - mean_q: -112.783 Interval 8132 (4065500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5708 7 episodes - episode_reward: -178.423 [-319.569, -104.335] - loss: 14.726 - mae: 85.476 - mean_q: -112.753 Interval 8133 (4066000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3556 10 episodes - episode_reward: -171.134 [-228.266, -130.637] - loss: 12.706 - mae: 85.471 - mean_q: -112.695 Interval 8134 (4066500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9652 8 episodes - episode_reward: -183.436 [-213.322, -116.294] - loss: 9.581 - mae: 85.425 - mean_q: -112.699 Interval 8135 (4067000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5902 10 episodes - episode_reward: -134.873 [-210.819, 64.539] - loss: 9.470 - mae: 85.404 - mean_q: -112.724 Interval 8136 (4067500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8735 7 episodes - episode_reward: -193.420 [-257.625, -147.434] - loss: 19.811 - mae: 85.437 - mean_q: -112.691 Interval 8137 (4068000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7693 8 episodes - episode_reward: -183.403 [-253.669, -127.160] - loss: 11.579 - mae: 85.401 - mean_q: -112.644 Interval 8138 (4068500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1899 6 episodes - episode_reward: -179.216 [-268.643, -109.436] - loss: 12.369 - mae: 85.386 - mean_q: -112.619 Interval 8139 (4069000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7731 6 episodes - episode_reward: -221.416 [-368.232, -119.499] - loss: 13.947 - mae: 85.403 - mean_q: -112.582 Interval 8140 (4069500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2201 7 episodes - episode_reward: -169.901 [-265.277, -117.462] - loss: 12.466 - mae: 85.372 - mean_q: -112.548 Interval 8141 (4070000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8248 8 episodes - episode_reward: -176.002 [-216.845, -141.445] - loss: 10.452 - mae: 85.348 - mean_q: -112.533 Interval 8142 (4070500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3851 8 episodes - episode_reward: -146.467 [-296.935, -14.521] - loss: 10.660 - mae: 85.312 - mean_q: -112.511 Interval 8143 (4071000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3996 9 episodes - episode_reward: -187.239 [-284.654, -124.186] - loss: 11.969 - mae: 85.301 - mean_q: -112.506 Interval 8144 (4071500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8048 8 episodes - episode_reward: -180.103 [-273.132, -108.149] - loss: 10.920 - mae: 85.280 - mean_q: -112.491 Interval 8145 (4072000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8622 10 episodes - episode_reward: -193.875 [-282.507, -131.652] - loss: 12.009 - mae: 85.280 - mean_q: -112.461 Interval 8146 (4072500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5627 6 episodes - episode_reward: -203.779 [-249.544, -116.095] - loss: 15.339 - mae: 85.267 - mean_q: -112.409 Interval 8147 (4073000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7344 7 episodes - episode_reward: -189.918 [-247.438, -36.843] - loss: 14.359 - mae: 85.242 - mean_q: -112.379 Interval 8148 (4073500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5916 6 episodes - episode_reward: -210.010 [-288.198, -161.150] - loss: 10.242 - mae: 85.216 - mean_q: -112.370 Interval 8149 (4074000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9241 10 episodes - episode_reward: -207.838 [-494.284, -100.000] - loss: 9.332 - mae: 85.195 - mean_q: -112.368 Interval 8150 (4074500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9534 9 episodes - episode_reward: -166.256 [-217.413, -89.418] - loss: 11.300 - mae: 85.191 - mean_q: -112.385 Interval 8151 (4075000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8709 6 episodes - episode_reward: -143.186 [-216.183, 6.001] - loss: 14.896 - mae: 85.184 - mean_q: -112.347 Interval 8152 (4075500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9095 7 episodes - episode_reward: -219.558 [-343.570, -149.282] - loss: 14.552 - mae: 85.166 - mean_q: -112.308 Interval 8153 (4076000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9579 7 episodes - episode_reward: -207.150 [-284.682, -124.891] - loss: 14.275 - mae: 85.139 - mean_q: -112.286 Interval 8154 (4076500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3875 9 episodes - episode_reward: -187.195 [-381.295, -30.927] - loss: 11.666 - mae: 85.086 - mean_q: -112.247 Interval 8155 (4077000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0713 7 episodes - episode_reward: -215.636 [-256.840, -190.026] - loss: 10.921 - mae: 85.045 - mean_q: -112.241 Interval 8156 (4077500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5670 7 episodes - episode_reward: -173.946 [-244.380, -118.190] - loss: 11.327 - mae: 85.005 - mean_q: -112.238 Interval 8157 (4078000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6045 7 episodes - episode_reward: -202.903 [-285.233, -153.745] - loss: 11.987 - mae: 84.977 - mean_q: -112.228 Interval 8158 (4078500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1132 8 episodes - episode_reward: -192.400 [-248.785, -100.000] - loss: 11.504 - mae: 84.927 - mean_q: -112.233 Interval 8159 (4079000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4226 7 episodes - episode_reward: -170.165 [-210.379, -121.089] - loss: 13.655 - mae: 84.902 - mean_q: -112.231 Interval 8160 (4079500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9968 8 episodes - episode_reward: -182.537 [-451.357, 28.799] - loss: 9.718 - mae: 84.858 - mean_q: -112.229 Interval 8161 (4080000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5143 4 episodes - episode_reward: -504.032 [-725.178, -306.808] - loss: 9.743 - mae: 84.842 - mean_q: -112.202 Interval 8162 (4080500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7608 8 episodes - episode_reward: -329.617 [-1471.696, -122.130] - loss: 8.815 - mae: 84.845 - mean_q: -112.236 Interval 8163 (4081000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6024 7 episodes - episode_reward: -191.685 [-203.327, -157.783] - loss: 16.590 - mae: 84.896 - mean_q: -112.235 Interval 8164 (4081500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6347 7 episodes - episode_reward: -190.187 [-226.191, -120.130] - loss: 11.168 - mae: 84.886 - mean_q: -112.210 Interval 8165 (4082000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8672 8 episodes - episode_reward: -175.424 [-286.395, 42.182] - loss: 14.600 - mae: 84.901 - mean_q: -112.223 Interval 8166 (4082500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0372 9 episodes - episode_reward: -169.969 [-208.301, -136.052] - loss: 10.958 - mae: 84.895 - mean_q: -112.192 Interval 8167 (4083000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5080 8 episodes - episode_reward: -160.497 [-272.203, 21.925] - loss: 10.633 - mae: 84.888 - mean_q: -112.187 Interval 8168 (4083500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7817 7 episodes - episode_reward: -196.702 [-269.276, -142.527] - loss: 15.676 - mae: 84.912 - mean_q: -112.189 Interval 8169 (4084000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8928 8 episodes - episode_reward: -182.886 [-214.719, -100.000] - loss: 11.875 - mae: 84.903 - mean_q: -112.162 Interval 8170 (4084500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4074 8 episodes - episode_reward: -211.746 [-294.314, -148.998] - loss: 11.557 - mae: 84.928 - mean_q: -112.159 Interval 8171 (4085000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3448 7 episodes - episode_reward: -159.447 [-266.331, -61.022] - loss: 10.317 - mae: 84.919 - mean_q: -112.153 Interval 8172 (4085500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7558 10 episodes - episode_reward: -183.139 [-273.722, -87.876] - loss: 9.930 - mae: 84.902 - mean_q: -112.171 Interval 8173 (4086000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9513 9 episodes - episode_reward: -172.857 [-290.465, -41.351] - loss: 13.436 - mae: 84.893 - mean_q: -112.155 Interval 8174 (4086500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7538 7 episodes - episode_reward: -191.061 [-322.660, -100.000] - loss: 11.605 - mae: 84.860 - mean_q: -112.179 Interval 8175 (4087000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5006 7 episodes - episode_reward: -184.842 [-256.776, -119.437] - loss: 14.538 - mae: 84.841 - mean_q: -112.188 Interval 8176 (4087500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0422 8 episodes - episode_reward: -190.248 [-276.263, -137.808] - loss: 13.297 - mae: 84.830 - mean_q: -112.177 Interval 8177 (4088000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8596 8 episodes - episode_reward: -172.843 [-191.144, -147.185] - loss: 13.695 - mae: 84.827 - mean_q: -112.163 Interval 8178 (4088500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5121 7 episodes - episode_reward: -179.081 [-249.021, -64.190] - loss: 12.619 - mae: 84.787 - mean_q: -112.144 Interval 8179 (4089000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3360 7 episodes - episode_reward: -164.383 [-252.219, -32.339] - loss: 12.319 - mae: 84.772 - mean_q: -112.155 Interval 8180 (4089500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6677 7 episodes - episode_reward: -190.906 [-317.099, -39.509] - loss: 13.079 - mae: 84.780 - mean_q: -112.154 Interval 8181 (4090000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5428 8 episodes - episode_reward: -163.552 [-218.630, -106.676] - loss: 11.359 - mae: 84.773 - mean_q: -112.156 Interval 8182 (4090500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.9336 8 episodes - episode_reward: -243.576 [-562.530, -109.276] - loss: 13.056 - mae: 84.762 - mean_q: -112.165 Interval 8183 (4091000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.9337 9 episodes - episode_reward: -335.568 [-799.520, -100.000] - loss: 10.720 - mae: 84.749 - mean_q: -112.180 Interval 8184 (4091500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9149 7 episodes - episode_reward: -206.773 [-346.837, -156.208] - loss: 12.506 - mae: 84.770 - mean_q: -112.185 Interval 8185 (4092000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9716 9 episodes - episode_reward: -165.641 [-282.096, -57.635] - loss: 12.630 - mae: 84.773 - mean_q: -112.190 Interval 8186 (4092500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7444 7 episodes - episode_reward: -195.092 [-222.609, -149.868] - loss: 11.628 - mae: 84.776 - mean_q: -112.213 Interval 8187 (4093000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7461 8 episodes - episode_reward: -172.776 [-254.138, -117.608] - loss: 12.003 - mae: 84.785 - mean_q: -112.231 Interval 8188 (4093500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3246 6 episodes - episode_reward: -184.398 [-238.317, -103.984] - loss: 12.705 - mae: 84.784 - mean_q: -112.248 Interval 8189 (4094000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7233 8 episodes - episode_reward: -170.578 [-257.820, -98.698] - loss: 11.078 - mae: 84.792 - mean_q: -112.278 Interval 8190 (4094500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6328 9 episodes - episode_reward: -153.959 [-177.526, -100.000] - loss: 15.501 - mae: 84.820 - mean_q: -112.289 Interval 8191 (4095000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2954 6 episodes - episode_reward: -190.248 [-229.160, -144.297] - loss: 12.053 - mae: 84.811 - mean_q: -112.292 Interval 8192 (4095500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4725 9 episodes - episode_reward: -195.257 [-277.395, -73.771] - loss: 12.081 - mae: 84.825 - mean_q: -112.303 Interval 8193 (4096000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7880 12 episodes - episode_reward: -153.892 [-238.682, -34.487] - loss: 14.348 - mae: 84.832 - mean_q: -112.320 Interval 8194 (4096500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5347 8 episodes - episode_reward: -161.879 [-275.282, -55.142] - loss: 12.674 - mae: 84.825 - mean_q: -112.333 Interval 8195 (4097000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9201 7 episodes - episode_reward: -137.676 [-184.447, -23.332] - loss: 14.887 - mae: 84.841 - mean_q: -112.320 Interval 8196 (4097500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8270 9 episodes - episode_reward: -153.947 [-223.447, -100.000] - loss: 14.716 - mae: 84.841 - mean_q: -112.316 Interval 8197 (4098000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2095 6 episodes - episode_reward: -178.149 [-275.619, -18.006] - loss: 12.275 - mae: 84.848 - mean_q: -112.303 Interval 8198 (4098500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6935 7 episodes - episode_reward: -199.999 [-245.754, -153.331] - loss: 17.383 - mae: 84.889 - mean_q: -112.311 Interval 8199 (4099000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5354 7 episodes - episode_reward: -175.947 [-229.818, -118.731] - loss: 11.941 - mae: 84.872 - mean_q: -112.310 Interval 8200 (4099500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5866 7 episodes - episode_reward: -182.523 [-285.464, -100.000] - loss: 14.770 - mae: 84.896 - mean_q: -112.322 Interval 8201 (4100000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3048 9 episodes - episode_reward: -184.808 [-356.267, -100.000] - loss: 11.879 - mae: 84.888 - mean_q: -112.331 Interval 8202 (4100500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1754 9 episodes - episode_reward: -179.615 [-277.664, -89.942] - loss: 13.793 - mae: 84.900 - mean_q: -112.340 Interval 8203 (4101000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4436 7 episodes - episode_reward: -168.530 [-274.081, -55.395] - loss: 15.603 - mae: 84.918 - mean_q: -112.331 Interval 8204 (4101500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2155 7 episodes - episode_reward: -158.888 [-277.344, 64.435] - loss: 13.497 - mae: 84.917 - mean_q: -112.324 Interval 8205 (4102000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3732 8 episodes - episode_reward: -154.751 [-188.652, -113.325] - loss: 12.877 - mae: 84.917 - mean_q: -112.343 Interval 8206 (4102500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9537 8 episodes - episode_reward: -182.032 [-276.909, -61.379] - loss: 10.248 - mae: 84.906 - mean_q: -112.340 Interval 8207 (4103000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3980 9 episodes - episode_reward: -136.035 [-301.501, 30.623] - loss: 14.092 - mae: 84.956 - mean_q: -112.350 Interval 8208 (4103500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2673 7 episodes - episode_reward: -161.284 [-352.924, 51.984] - loss: 19.061 - mae: 84.987 - mean_q: -112.333 Interval 8209 (4104000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3576 10 episodes - episode_reward: -159.760 [-219.507, -102.010] - loss: 12.484 - mae: 84.964 - mean_q: -112.322 Interval 8210 (4104500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4808 7 episodes - episode_reward: -185.748 [-289.615, -127.584] - loss: 16.509 - mae: 84.978 - mean_q: -112.291 Interval 8211 (4105000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2441 8 episodes - episode_reward: -210.283 [-260.815, -161.970] - loss: 14.966 - mae: 84.964 - mean_q: -112.296 Interval 8212 (4105500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9018 9 episodes - episode_reward: -154.377 [-241.999, -95.620] - loss: 12.534 - mae: 84.941 - mean_q: -112.281 Interval 8213 (4106000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0797 8 episodes - episode_reward: -189.692 [-238.478, -94.644] - loss: 18.846 - mae: 84.990 - mean_q: -112.259 Interval 8214 (4106500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4429 6 episodes - episode_reward: -202.455 [-293.225, -140.037] - loss: 15.596 - mae: 84.962 - mean_q: -112.257 Interval 8215 (4107000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8595 8 episodes - episode_reward: -183.758 [-249.046, -113.568] - loss: 17.167 - mae: 84.975 - mean_q: -112.236 Interval 8216 (4107500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5646 7 episodes - episode_reward: -179.059 [-223.228, -143.398] - loss: 17.778 - mae: 84.968 - mean_q: -112.182 Interval 8217 (4108000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0287 8 episodes - episode_reward: -182.039 [-216.294, -156.598] - loss: 14.409 - mae: 84.948 - mean_q: -112.163 Interval 8218 (4108500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7602 7 episodes - episode_reward: -198.211 [-261.867, -129.119] - loss: 13.829 - mae: 84.938 - mean_q: -112.167 Interval 8219 (4109000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6063 8 episodes - episode_reward: -174.130 [-216.112, -125.580] - loss: 20.204 - mae: 84.949 - mean_q: -112.155 Interval 8220 (4109500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4784 10 episodes - episode_reward: -174.739 [-264.482, -117.041] - loss: 14.536 - mae: 84.923 - mean_q: -112.124 Interval 8221 (4110000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0092 8 episodes - episode_reward: -187.541 [-362.289, -91.359] - loss: 11.404 - mae: 84.908 - mean_q: -112.118 Interval 8222 (4110500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3386 8 episodes - episode_reward: -201.915 [-409.687, -118.798] - loss: 16.197 - mae: 84.940 - mean_q: -112.112 Interval 8223 (4111000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1477 8 episodes - episode_reward: -199.693 [-277.329, -129.918] - loss: 16.237 - mae: 84.927 - mean_q: -112.103 Interval 8224 (4111500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2439 9 episodes - episode_reward: -187.714 [-253.197, -100.000] - loss: 14.572 - mae: 84.900 - mean_q: -112.074 Interval 8225 (4112000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0270 8 episodes - episode_reward: -181.383 [-230.768, -131.689] - loss: 15.748 - mae: 84.902 - mean_q: -112.072 Interval 8226 (4112500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4811 8 episodes - episode_reward: -155.335 [-174.739, -140.432] - loss: 15.100 - mae: 84.886 - mean_q: -112.058 Interval 8227 (4113000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8809 7 episodes - episode_reward: -206.775 [-252.987, -183.397] - loss: 18.107 - mae: 84.858 - mean_q: -112.040 Interval 8228 (4113500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0204 7 episodes - episode_reward: -216.319 [-301.712, -100.000] - loss: 12.088 - mae: 84.818 - mean_q: -112.041 Interval 8229 (4114000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8422 8 episodes - episode_reward: -171.038 [-231.489, -113.107] - loss: 14.688 - mae: 84.817 - mean_q: -112.017 Interval 8230 (4114500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4406 6 episodes - episode_reward: -206.079 [-306.370, -147.474] - loss: 14.429 - mae: 84.786 - mean_q: -112.005 Interval 8231 (4115000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0082 7 episodes - episode_reward: -214.062 [-322.049, -150.872] - loss: 14.714 - mae: 84.778 - mean_q: -111.999 Interval 8232 (4115500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0552 7 episodes - episode_reward: -153.445 [-228.166, -61.560] - loss: 18.588 - mae: 84.783 - mean_q: -111.990 Interval 8233 (4116000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2014 7 episodes - episode_reward: -156.016 [-281.986, -31.691] - loss: 12.378 - mae: 84.757 - mean_q: -111.949 Interval 8234 (4116500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6600 8 episodes - episode_reward: -162.040 [-259.455, -100.000] - loss: 13.064 - mae: 84.742 - mean_q: -111.957 Interval 8235 (4117000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7782 7 episodes - episode_reward: -201.629 [-256.924, -133.323] - loss: 12.878 - mae: 84.738 - mean_q: -111.959 Interval 8236 (4117500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3774 9 episodes - episode_reward: -189.387 [-264.498, -123.291] - loss: 13.967 - mae: 84.733 - mean_q: -111.979 Interval 8237 (4118000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7024 10 episodes - episode_reward: -179.204 [-250.375, -107.624] - loss: 12.153 - mae: 84.714 - mean_q: -111.993 Interval 8238 (4118500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9741 8 episodes - episode_reward: -184.858 [-229.327, -151.971] - loss: 17.965 - mae: 84.721 - mean_q: -111.992 Interval 8239 (4119000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6244 8 episodes - episode_reward: -168.590 [-249.723, -101.070] - loss: 14.155 - mae: 84.715 - mean_q: -111.970 Interval 8240 (4119500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8928 8 episodes - episode_reward: -186.303 [-262.537, -100.000] - loss: 12.134 - mae: 84.687 - mean_q: -111.951 Interval 8241 (4120000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9756 8 episodes - episode_reward: -181.951 [-217.541, -103.180] - loss: 12.471 - mae: 84.687 - mean_q: -111.952 Interval 8242 (4120500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9910 9 episodes - episode_reward: -168.916 [-243.571, -103.125] - loss: 11.249 - mae: 84.670 - mean_q: -111.961 Interval 8243 (4121000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7646 6 episodes - episode_reward: -228.663 [-401.875, -185.264] - loss: 12.810 - mae: 84.687 - mean_q: -111.978 Interval 8244 (4121500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5083 8 episodes - episode_reward: -218.512 [-407.215, -157.939] - loss: 16.360 - mae: 84.707 - mean_q: -111.971 Interval 8245 (4122000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8815 7 episodes - episode_reward: -199.054 [-353.430, -127.946] - loss: 13.063 - mae: 84.701 - mean_q: -111.964 Interval 8246 (4122500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3568 7 episodes - episode_reward: -163.494 [-277.741, 7.821] - loss: 9.768 - mae: 84.691 - mean_q: -111.969 Interval 8247 (4123000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7418 7 episodes - episode_reward: -215.947 [-295.860, -175.659] - loss: 10.118 - mae: 84.694 - mean_q: -111.989 Interval 8248 (4123500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3601 9 episodes - episode_reward: -179.774 [-214.639, -151.778] - loss: 11.596 - mae: 84.698 - mean_q: -112.006 Interval 8249 (4124000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1449 7 episodes - episode_reward: -156.058 [-354.959, 35.047] - loss: 12.163 - mae: 84.725 - mean_q: -112.019 Interval 8250 (4124500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6513 8 episodes - episode_reward: -167.743 [-245.854, -130.233] - loss: 15.956 - mae: 84.744 - mean_q: -112.000 Interval 8251 (4125000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1911 8 episodes - episode_reward: -198.815 [-463.334, -120.941] - loss: 13.354 - mae: 84.713 - mean_q: -112.006 Interval 8252 (4125500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2452 7 episodes - episode_reward: -159.117 [-284.137, -68.431] - loss: 11.381 - mae: 84.717 - mean_q: -111.997 Interval 8253 (4126000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7367 7 episodes - episode_reward: -185.499 [-320.520, -134.760] - loss: 10.632 - mae: 84.696 - mean_q: -112.008 Interval 8254 (4126500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1837 9 episodes - episode_reward: -183.924 [-280.314, -100.000] - loss: 14.124 - mae: 84.710 - mean_q: -112.015 Interval 8255 (4127000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4709 6 episodes - episode_reward: -198.886 [-333.780, -129.126] - loss: 12.159 - mae: 84.697 - mean_q: -112.013 Interval 8256 (4127500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7684: 0s 9 episodes - episode_reward: -160.074 [-322.991, 6.721] - loss: 11.948 - mae: 84.700 - mean_q: -112.044 Interval 8257 (4128000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8773 7 episodes - episode_reward: -194.518 [-245.243, -144.148] - loss: 14.123 - mae: 84.703 - mean_q: -112.045 Interval 8258 (4128500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0085 7 episodes - episode_reward: -218.429 [-356.764, -166.988] - loss: 14.283 - mae: 84.715 - mean_q: -112.034 Interval 8259 (4129000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4047 9 episodes - episode_reward: -188.853 [-270.588, -129.766] - loss: 13.823 - mae: 84.722 - mean_q: -112.035 Interval 8260 (4129500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5045 11 episodes - episode_reward: -163.577 [-213.673, -100.000] - loss: 12.780 - mae: 84.712 - mean_q: -112.038 Interval 8261 (4130000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0281 9 episodes - episode_reward: -164.402 [-237.949, -52.782] - loss: 16.075 - mae: 84.697 - mean_q: -112.037 Interval 8262 (4130500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2416 10 episodes - episode_reward: -166.107 [-258.214, -45.040] - loss: 16.777 - mae: 84.669 - mean_q: -112.011 Interval 8263 (4131000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4585 10 episodes - episode_reward: -170.966 [-245.047, -49.572] - loss: 16.220 - mae: 84.641 - mean_q: -112.014 Interval 8264 (4131500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5262 8 episodes - episode_reward: -224.589 [-333.452, -148.289] - loss: 16.353 - mae: 84.604 - mean_q: -112.005 Interval 8265 (4132000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4485 5 episodes - episode_reward: -423.981 [-530.247, -317.933] - loss: 11.330 - mae: 84.527 - mean_q: -111.926 Interval 8266 (4132500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.1357 5 episodes - episode_reward: -635.315 [-1213.162, -247.683] - loss: 15.333 - mae: 84.557 - mean_q: -111.918 Interval 8267 (4133000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4977 9 episodes - episode_reward: -189.083 [-380.983, -100.000] - loss: 13.542 - mae: 84.582 - mean_q: -111.987 Interval 8268 (4133500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7204 8 episodes - episode_reward: -171.386 [-196.429, -152.394] - loss: 14.212 - mae: 84.611 - mean_q: -111.996 Interval 8269 (4134000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3938 8 episodes - episode_reward: -211.584 [-310.419, -155.211] - loss: 13.739 - mae: 84.633 - mean_q: -111.993 Interval 8270 (4134500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4738 8 episodes - episode_reward: -157.484 [-196.411, -116.704] - loss: 10.296 - mae: 84.636 - mean_q: -112.022 Interval 8271 (4135000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6713 7 episodes - episode_reward: -183.863 [-211.209, -151.972] - loss: 10.032 - mae: 84.667 - mean_q: -112.056 Interval 8272 (4135500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7398 8 episodes - episode_reward: -234.310 [-407.539, -119.404] - loss: 17.465 - mae: 84.719 - mean_q: -112.063 Interval 8273 (4136000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7414 7 episodes - episode_reward: -184.779 [-307.074, -144.352] - loss: 13.726 - mae: 84.721 - mean_q: -112.061 Interval 8274 (4136500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1435 8 episodes - episode_reward: -202.044 [-276.535, -158.215] - loss: 11.775 - mae: 84.734 - mean_q: -112.074 Interval 8275 (4137000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4844 7 episodes - episode_reward: -183.507 [-255.102, -112.060] - loss: 16.736 - mae: 84.758 - mean_q: -112.070 Interval 8276 (4137500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6152 8 episodes - episode_reward: -161.486 [-204.246, -100.000] - loss: 10.129 - mae: 84.763 - mean_q: -112.058 Interval 8277 (4138000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9823 7 episodes - episode_reward: -209.263 [-400.169, -162.333] - loss: 14.352 - mae: 84.787 - mean_q: -112.104 Interval 8278 (4138500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6525 9 episodes - episode_reward: -155.867 [-190.005, -122.865] - loss: 13.951 - mae: 84.791 - mean_q: -112.120 Interval 8279 (4139000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8903 8 episodes - episode_reward: -176.361 [-242.722, -93.325] - loss: 13.181 - mae: 84.798 - mean_q: -112.120 Interval 8280 (4139500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1275 9 episodes - episode_reward: -166.800 [-227.008, -75.396] - loss: 15.213 - mae: 84.816 - mean_q: -112.107 Interval 8281 (4140000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8960 9 episodes - episode_reward: -168.906 [-205.614, -130.517] - loss: 13.834 - mae: 84.819 - mean_q: -112.123 Interval 8282 (4140500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9287 7 episodes - episode_reward: -198.078 [-246.826, -139.069] - loss: 13.109 - mae: 84.807 - mean_q: -112.129 Interval 8283 (4141000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8883 8 episodes - episode_reward: -192.860 [-298.873, -129.159] - loss: 14.059 - mae: 84.808 - mean_q: -112.136 Interval 8284 (4141500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5021 9 episodes - episode_reward: -193.933 [-320.394, -135.519] - loss: 10.480 - mae: 84.801 - mean_q: -112.147 Interval 8285 (4142000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1643 7 episodes - episode_reward: -224.874 [-276.200, -178.957] - loss: 15.960 - mae: 84.810 - mean_q: -112.151 Interval 8286 (4142500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3776 7 episodes - episode_reward: -167.645 [-233.364, -141.936] - loss: 10.792 - mae: 84.793 - mean_q: -112.164 Interval 8287 (4143000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1392 7 episodes - episode_reward: -161.336 [-226.697, -113.668] - loss: 9.234 - mae: 84.798 - mean_q: -112.193 Interval 8288 (4143500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7631 6 episodes - episode_reward: -209.466 [-334.594, -118.025] - loss: 15.483 - mae: 84.845 - mean_q: -112.197 Interval 8289 (4144000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2853 9 episodes - episode_reward: -193.557 [-283.925, -147.290] - loss: 11.464 - mae: 84.825 - mean_q: -112.195 Interval 8290 (4144500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1254 7 episodes - episode_reward: -224.755 [-347.835, -148.835] - loss: 12.222 - mae: 84.812 - mean_q: -112.209 Interval 8291 (4145000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4545 6 episodes - episode_reward: -181.921 [-246.580, -141.930] - loss: 15.303 - mae: 84.823 - mean_q: -112.198 Interval 8292 (4145500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8859 9 episodes - episode_reward: -169.057 [-268.780, -106.331] - loss: 16.764 - mae: 84.824 - mean_q: -112.198 Interval 8293 (4146000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4196 8 episodes - episode_reward: -153.862 [-218.855, 30.986] - loss: 13.893 - mae: 84.808 - mean_q: -112.174 Interval 8294 (4146500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5611 7 episodes - episode_reward: -178.761 [-286.153, -127.452] - loss: 14.421 - mae: 84.818 - mean_q: -112.178 Interval 8295 (4147000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2719 9 episodes - episode_reward: -180.686 [-235.373, -100.000] - loss: 11.647 - mae: 84.811 - mean_q: -112.163 Interval 8296 (4147500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9231 7 episodes - episode_reward: -203.258 [-265.696, -151.116] - loss: 12.388 - mae: 84.818 - mean_q: -112.191 Interval 8297 (4148000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8503 8 episodes - episode_reward: -184.402 [-236.700, -123.702] - loss: 16.517 - mae: 84.835 - mean_q: -112.191 Interval 8298 (4148500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6179 9 episodes - episode_reward: -145.775 [-236.122, -36.325] - loss: 11.867 - mae: 84.804 - mean_q: -112.201 Interval 8299 (4149000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9390 9 episodes - episode_reward: -160.611 [-245.886, -100.000] - loss: 13.709 - mae: 84.823 - mean_q: -112.206 Interval 8300 (4149500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1023 7 episodes - episode_reward: -235.153 [-373.351, -160.649] - loss: 11.012 - mae: 84.804 - mean_q: -112.221 Interval 8301 (4150000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2601 9 episodes - episode_reward: -174.980 [-226.860, -100.000] - loss: 11.014 - mae: 84.810 - mean_q: -112.249 Interval 8302 (4150500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6085 6 episodes - episode_reward: -219.984 [-249.910, -205.318] - loss: 11.334 - mae: 84.795 - mean_q: -112.283 Interval 8303 (4151000 steps performed) 500/500 [==============================] - ETA: 0s - reward: -3.69 - 2s 4ms/step - reward: -3.6578 9 episodes - episode_reward: -197.000 [-499.999, -100.000] - loss: 13.638 - mae: 84.812 - mean_q: -112.295 Interval 8304 (4151500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5899 7 episodes - episode_reward: -176.516 [-261.219, -76.198] - loss: 12.328 - mae: 84.811 - mean_q: -112.292 Interval 8305 (4152000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.4961 8 episodes - episode_reward: -346.495 [-798.393, -154.337] - loss: 12.411 - mae: 84.839 - mean_q: -112.333 Interval 8306 (4152500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5548 8 episodes - episode_reward: -174.291 [-236.424, -132.179] - loss: 11.937 - mae: 84.875 - mean_q: -112.354 Interval 8307 (4153000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6905 7 episodes - episode_reward: -185.646 [-270.701, -13.067] - loss: 11.901 - mae: 84.901 - mean_q: -112.361 Interval 8308 (4153500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9453 8 episodes - episode_reward: -180.428 [-218.448, -117.683] - loss: 13.430 - mae: 84.928 - mean_q: -112.387 Interval 8309 (4154000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4434 8 episodes - episode_reward: -222.035 [-402.683, -117.036] - loss: 10.317 - mae: 84.934 - mean_q: -112.418 Interval 8310 (4154500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0466 8 episodes - episode_reward: -192.115 [-271.897, -121.813] - loss: 13.905 - mae: 84.990 - mean_q: -112.456 Interval 8311 (4155000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7427 7 episodes - episode_reward: -191.938 [-257.832, -153.462] - loss: 9.827 - mae: 84.996 - mean_q: -112.495 Interval 8312 (4155500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5414 7 episodes - episode_reward: -164.380 [-198.785, -151.997] - loss: 11.343 - mae: 85.044 - mean_q: -112.524 Interval 8313 (4156000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9789 8 episodes - episode_reward: -195.440 [-354.311, -100.000] - loss: 14.058 - mae: 85.052 - mean_q: -112.566 Interval 8314 (4156500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3104 6 episodes - episode_reward: -187.919 [-217.075, -136.546] - loss: 10.239 - mae: 85.065 - mean_q: -112.584 Interval 8315 (4157000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6332 7 episodes - episode_reward: -179.246 [-217.640, -113.691] - loss: 11.029 - mae: 85.097 - mean_q: -112.614 Interval 8316 (4157500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7613 8 episodes - episode_reward: -183.897 [-281.657, -140.400] - loss: 9.655 - mae: 85.117 - mean_q: -112.655 Interval 8317 (4158000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.6576 9 episodes - episode_reward: -374.885 [-774.906, -120.309] - loss: 8.604 - mae: 85.132 - mean_q: -112.703 Interval 8318 (4158500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8436 8 episodes - episode_reward: -244.856 [-362.733, -175.060] - loss: 11.345 - mae: 85.207 - mean_q: -112.796 Interval 8319 (4159000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7222 7 episodes - episode_reward: -193.506 [-246.340, -150.543] - loss: 15.434 - mae: 85.267 - mean_q: -112.836 Interval 8320 (4159500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7902 7 episodes - episode_reward: -196.714 [-267.079, -154.119] - loss: 11.393 - mae: 85.284 - mean_q: -112.847 Interval 8321 (4160000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9669 8 episodes - episode_reward: -180.744 [-229.211, -138.728] - loss: 10.727 - mae: 85.325 - mean_q: -112.869 Interval 8322 (4160500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5218 10 episodes - episode_reward: -181.332 [-257.336, -108.570] - loss: 12.381 - mae: 85.365 - mean_q: -112.907 Interval 8323 (4161000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0631 7 episodes - episode_reward: -207.156 [-268.005, -159.706] - loss: 10.437 - mae: 85.409 - mean_q: -112.922 Interval 8324 (4161500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4652 7 episodes - episode_reward: -188.951 [-280.388, -34.900] - loss: 14.530 - mae: 85.472 - mean_q: -112.931 Interval 8325 (4162000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2751 9 episodes - episode_reward: -181.018 [-279.461, -140.370] - loss: 10.159 - mae: 85.492 - mean_q: -112.963 Interval 8326 (4162500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9981 9 episodes - episode_reward: -162.766 [-240.303, -100.000] - loss: 13.335 - mae: 85.540 - mean_q: -112.997 Interval 8327 (4163000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4995 7 episodes - episode_reward: -187.239 [-436.041, -54.630] - loss: 14.114 - mae: 85.579 - mean_q: -113.016 Interval 8328 (4163500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8508 7 episodes - episode_reward: -194.569 [-248.305, -141.536] - loss: 10.978 - mae: 85.602 - mean_q: -113.032 Interval 8329 (4164000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8895 8 episodes - episode_reward: -184.676 [-283.116, -114.773] - loss: 14.011 - mae: 85.656 - mean_q: -113.065 Interval 8330 (4164500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8613 8 episodes - episode_reward: -179.203 [-280.435, -84.885] - loss: 13.675 - mae: 85.673 - mean_q: -113.086 Interval 8331 (4165000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8011 7 episodes - episode_reward: -192.867 [-322.389, -143.463] - loss: 11.003 - mae: 85.688 - mean_q: -113.092 Interval 8332 (4165500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0118 6 episodes - episode_reward: -166.618 [-232.842, -138.685] - loss: 16.913 - mae: 85.719 - mean_q: -113.084 Interval 8333 (4166000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0038 6 episodes - episode_reward: -155.339 [-171.624, -121.538] - loss: 13.034 - mae: 85.733 - mean_q: -113.099 Interval 8334 (4166500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8078 8 episodes - episode_reward: -188.184 [-245.899, -100.000] - loss: 10.951 - mae: 85.741 - mean_q: -113.108 Interval 8335 (4167000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7694 7 episodes - episode_reward: -191.952 [-225.534, -159.829] - loss: 12.275 - mae: 85.777 - mean_q: -113.133 Interval 8336 (4167500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1596 8 episodes - episode_reward: -136.287 [-184.758, 8.077] - loss: 15.210 - mae: 85.794 - mean_q: -113.168 Interval 8337 (4168000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9089 8 episodes - episode_reward: -184.389 [-286.234, -138.312] - loss: 12.279 - mae: 85.825 - mean_q: -113.150 Interval 8338 (4168500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3219 8 episodes - episode_reward: -208.426 [-340.071, -134.420] - loss: 8.529 - mae: 85.817 - mean_q: -113.183 Interval 8339 (4169000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5693 11 episodes - episode_reward: -164.771 [-223.419, -100.000] - loss: 13.636 - mae: 85.877 - mean_q: -113.215 Interval 8340 (4169500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1255 9 episodes - episode_reward: -166.493 [-220.409, -121.659] - loss: 11.602 - mae: 85.866 - mean_q: -113.209 Interval 8341 (4170000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1714 10 episodes - episode_reward: -158.607 [-211.291, -100.000] - loss: 11.567 - mae: 85.885 - mean_q: -113.236 Interval 8342 (4170500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1002 7 episodes - episode_reward: -152.691 [-196.300, -51.729] - loss: 11.795 - mae: 85.904 - mean_q: -113.262 Interval 8343 (4171000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1099 9 episodes - episode_reward: -177.971 [-215.621, -132.866] - loss: 9.376 - mae: 85.899 - mean_q: -113.286 Interval 8344 (4171500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1242 8 episodes - episode_reward: -184.509 [-278.035, -100.000] - loss: 9.181 - mae: 85.926 - mean_q: -113.328 Interval 8345 (4172000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9190 9 episodes - episode_reward: -171.431 [-221.092, -112.019] - loss: 13.887 - mae: 85.961 - mean_q: -113.350 Interval 8346 (4172500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2653 8 episodes - episode_reward: -141.786 [-253.118, 35.375] - loss: 11.991 - mae: 85.969 - mean_q: -113.352 Interval 8347 (4173000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6711 6 episodes - episode_reward: -209.136 [-300.929, -159.979] - loss: 7.878 - mae: 85.956 - mean_q: -113.358 Interval 8348 (4173500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8626 8 episodes - episode_reward: -187.657 [-271.961, -141.073] - loss: 12.888 - mae: 85.996 - mean_q: -113.371 Interval 8349 (4174000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5120 8 episodes - episode_reward: -158.172 [-219.936, -113.555] - loss: 9.859 - mae: 85.988 - mean_q: -113.393 Interval 8350 (4174500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3209 6 episodes - episode_reward: -188.647 [-368.880, -56.706] - loss: 10.017 - mae: 86.005 - mean_q: -113.420 Interval 8351 (4175000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4810 9 episodes - episode_reward: -197.269 [-308.479, -137.624] - loss: 8.032 - mae: 86.027 - mean_q: -113.424 Interval 8352 (4175500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6228 7 episodes - episode_reward: -186.334 [-227.645, -149.417] - loss: 11.194 - mae: 86.059 - mean_q: -113.437 Interval 8353 (4176000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5934 7 episodes - episode_reward: -176.368 [-235.590, -116.476] - loss: 9.367 - mae: 86.064 - mean_q: -113.452 Interval 8354 (4176500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3400 7 episodes - episode_reward: -156.095 [-231.910, -4.562] - loss: 13.053 - mae: 86.092 - mean_q: -113.446 Interval 8355 (4177000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2673 8 episodes - episode_reward: -154.675 [-269.128, -9.198] - loss: 13.452 - mae: 86.094 - mean_q: -113.456 Interval 8356 (4177500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0128 8 episodes - episode_reward: -170.305 [-226.759, -131.507] - loss: 10.221 - mae: 86.090 - mean_q: -113.464 Interval 8357 (4178000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6200 11 episodes - episode_reward: -133.179 [-308.172, 89.613] - loss: 8.327 - mae: 86.098 - mean_q: -113.473 Interval 8358 (4178500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4564 8 episodes - episode_reward: -164.203 [-254.197, -100.000] - loss: 8.042 - mae: 86.113 - mean_q: -113.498 Interval 8359 (4179000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0832 7 episodes - episode_reward: -139.512 [-198.489, 2.440] - loss: 9.373 - mae: 86.138 - mean_q: -113.517 Interval 8360 (4179500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2050 7 episodes - episode_reward: -225.195 [-346.259, -125.289] - loss: 13.838 - mae: 86.150 - mean_q: -113.497 Interval 8361 (4180000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1259 8 episodes - episode_reward: -194.246 [-331.716, -137.054] - loss: 12.359 - mae: 86.142 - mean_q: -113.481 Interval 8362 (4180500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2162 9 episodes - episode_reward: -177.803 [-210.444, -111.228] - loss: 9.873 - mae: 86.156 - mean_q: -113.465 Interval 8363 (4181000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9483 9 episodes - episode_reward: -166.283 [-281.772, -123.743] - loss: 8.392 - mae: 86.139 - mean_q: -113.463 Interval 8364 (4181500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5640 8 episodes - episode_reward: -156.544 [-204.782, -87.184] - loss: 12.286 - mae: 86.162 - mean_q: -113.454 Interval 8365 (4182000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8000 7 episodes - episode_reward: -198.453 [-271.562, -148.809] - loss: 11.003 - mae: 86.123 - mean_q: -113.440 Interval 8366 (4182500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9741 7 episodes - episode_reward: -214.779 [-339.652, -153.998] - loss: 10.651 - mae: 86.082 - mean_q: -113.422 Interval 8367 (4183000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9807 9 episodes - episode_reward: -159.184 [-295.097, -100.000] - loss: 11.651 - mae: 86.056 - mean_q: -113.396 Interval 8368 (4183500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1080 8 episodes - episode_reward: -202.143 [-289.982, -153.117] - loss: 9.940 - mae: 86.021 - mean_q: -113.386 Interval 8369 (4184000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2082 9 episodes - episode_reward: -171.595 [-227.713, -133.220] - loss: 11.272 - mae: 85.986 - mean_q: -113.389 Interval 8370 (4184500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0006 6 episodes - episode_reward: -160.915 [-240.342, -105.331] - loss: 13.319 - mae: 85.962 - mean_q: -113.346 Interval 8371 (4185000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1137 2 episodes - episode_reward: -911.502 [-1595.317, -227.688] - loss: 10.203 - mae: 85.908 - mean_q: -113.253 Interval 8372 (4185500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.4652 2 episodes - episode_reward: -1170.773 [-1706.705, -634.841] - loss: 10.950 - mae: 85.894 - mean_q: -113.224 Interval 8373 (4186000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5424 6 episodes - episode_reward: -328.521 [-1102.550, -95.544] - loss: 9.187 - mae: 85.899 - mean_q: -113.263 Interval 8374 (4186500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6389 9 episodes - episode_reward: -209.312 [-420.171, -96.465] - loss: 10.565 - mae: 85.915 - mean_q: -113.276 Interval 8375 (4187000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4816 8 episodes - episode_reward: -213.243 [-295.686, -172.330] - loss: 7.611 - mae: 85.922 - mean_q: -113.277 Interval 8376 (4187500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3606 7 episodes - episode_reward: -234.527 [-507.464, -48.260] - loss: 11.728 - mae: 85.966 - mean_q: -113.256 Interval 8377 (4188000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3443 6 episodes - episode_reward: -197.078 [-311.184, -127.992] - loss: 12.364 - mae: 85.985 - mean_q: -113.226 Interval 8378 (4188500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7031 8 episodes - episode_reward: -173.730 [-273.522, -43.839] - loss: 12.811 - mae: 85.992 - mean_q: -113.199 Interval 8379 (4189000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9351 8 episodes - episode_reward: -183.715 [-293.733, 21.851] - loss: 11.268 - mae: 86.015 - mean_q: -113.176 Interval 8380 (4189500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6551 7 episodes - episode_reward: -184.571 [-261.405, -129.413] - loss: 10.066 - mae: 86.021 - mean_q: -113.147 Interval 8381 (4190000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8332 9 episodes - episode_reward: -156.926 [-234.792, -85.130] - loss: 9.809 - mae: 86.025 - mean_q: -113.167 Interval 8382 (4190500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6004 9 episodes - episode_reward: -138.202 [-227.573, -18.213] - loss: 11.719 - mae: 86.042 - mean_q: -113.163 Interval 8383 (4191000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1994 7 episodes - episode_reward: -170.587 [-262.059, -129.726] - loss: 10.546 - mae: 86.029 - mean_q: -113.139 Interval 8384 (4191500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7737 9 episodes - episode_reward: -154.863 [-195.637, -127.933] - loss: 12.533 - mae: 86.030 - mean_q: -113.118 Interval 8385 (4192000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7216 7 episodes - episode_reward: -181.973 [-312.317, -98.752] - loss: 10.075 - mae: 86.032 - mean_q: -113.077 Interval 8386 (4192500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0725 9 episodes - episode_reward: -178.281 [-222.902, -100.000] - loss: 12.165 - mae: 86.019 - mean_q: -113.063 Interval 8387 (4193000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1935 8 episodes - episode_reward: -133.725 [-198.146, -5.641] - loss: 9.473 - mae: 85.999 - mean_q: -113.051 Interval 8388 (4193500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2629 6 episodes - episode_reward: -159.171 [-213.027, -113.336] - loss: 11.012 - mae: 86.006 - mean_q: -113.049 Interval 8389 (4194000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6714 7 episodes - episode_reward: -224.534 [-368.623, -169.708] - loss: 12.499 - mae: 86.004 - mean_q: -113.019 Interval 8390 (4194500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8307 8 episodes - episode_reward: -176.240 [-239.999, -121.194] - loss: 12.755 - mae: 86.000 - mean_q: -112.997 Interval 8391 (4195000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7175 8 episodes - episode_reward: -160.758 [-277.185, -100.000] - loss: 10.409 - mae: 85.970 - mean_q: -112.961 Interval 8392 (4195500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8022 7 episodes - episode_reward: -205.632 [-367.234, -128.132] - loss: 14.410 - mae: 85.966 - mean_q: -112.930 Interval 8393 (4196000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5680 9 episodes - episode_reward: -142.804 [-271.069, 58.953] - loss: 13.158 - mae: 85.944 - mean_q: -112.907 Interval 8394 (4196500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0618 9 episodes - episode_reward: -174.743 [-287.498, -100.000] - loss: 12.810 - mae: 85.914 - mean_q: -112.891 Interval 8395 (4197000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4368 7 episodes - episode_reward: -172.139 [-286.454, -119.450] - loss: 12.169 - mae: 85.886 - mean_q: -112.851 Interval 8396 (4197500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7839 8 episodes - episode_reward: -174.660 [-317.530, -20.031] - loss: 11.225 - mae: 85.867 - mean_q: -112.841 Interval 8397 (4198000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6754 6 episodes - episode_reward: -199.369 [-275.916, -147.283] - loss: 12.469 - mae: 85.849 - mean_q: -112.841 Interval 8398 (4198500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7459 8 episodes - episode_reward: -184.640 [-251.651, -100.000] - loss: 13.409 - mae: 85.835 - mean_q: -112.847 Interval 8399 (4199000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7936 8 episodes - episode_reward: -177.396 [-250.879, -122.453] - loss: 10.368 - mae: 85.818 - mean_q: -112.834 Interval 8400 (4199500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0029 8 episodes - episode_reward: -191.743 [-251.734, -143.254] - loss: 11.026 - mae: 85.830 - mean_q: -112.829 Interval 8401 (4200000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5612 6 episodes - episode_reward: -199.667 [-246.822, -147.934] - loss: 9.506 - mae: 85.811 - mean_q: -112.831 Interval 8402 (4200500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7371 7 episodes - episode_reward: -191.137 [-262.796, -135.662] - loss: 11.814 - mae: 85.818 - mean_q: -112.842 Interval 8403 (4201000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6435 7 episodes - episode_reward: -204.347 [-238.707, -185.276] - loss: 10.944 - mae: 85.802 - mean_q: -112.826 Interval 8404 (4201500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6113 7 episodes - episode_reward: -187.818 [-266.073, -105.936] - loss: 12.284 - mae: 85.789 - mean_q: -112.802 Interval 8405 (4202000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2769 9 episodes - episode_reward: -174.599 [-266.911, -109.376] - loss: 11.802 - mae: 85.758 - mean_q: -112.780 Interval 8406 (4202500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9538 10 episodes - episode_reward: -151.032 [-230.861, -86.776] - loss: 11.236 - mae: 85.745 - mean_q: -112.762 Interval 8407 (4203000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5803 7 episodes - episode_reward: -180.650 [-233.516, -128.172] - loss: 7.865 - mae: 85.709 - mean_q: -112.758 Interval 8408 (4203500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0692 8 episodes - episode_reward: -194.794 [-365.388, -144.433] - loss: 13.498 - mae: 85.697 - mean_q: -112.754 Interval 8409 (4204000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9902 7 episodes - episode_reward: -205.321 [-334.980, -132.004] - loss: 11.523 - mae: 85.666 - mean_q: -112.713 Interval 8410 (4204500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0843 9 episodes - episode_reward: -178.764 [-265.679, -100.000] - loss: 7.939 - mae: 85.642 - mean_q: -112.713 Interval 8411 (4205000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4180 6 episodes - episode_reward: -190.992 [-254.429, -6.291] - loss: 9.684 - mae: 85.650 - mean_q: -112.708 Interval 8412 (4205500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1044 10 episodes - episode_reward: -111.527 [-216.727, 58.048] - loss: 15.167 - mae: 85.659 - mean_q: -112.684 Interval 8413 (4206000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8222 7 episodes - episode_reward: -195.037 [-253.698, -147.978] - loss: 12.522 - mae: 85.634 - mean_q: -112.626 Interval 8414 (4206500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9259 8 episodes - episode_reward: -178.218 [-248.517, -80.934] - loss: 13.763 - mae: 85.604 - mean_q: -112.593 Interval 8415 (4207000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7904 8 episodes - episode_reward: -186.198 [-252.578, -136.695] - loss: 11.983 - mae: 85.568 - mean_q: -112.562 Interval 8416 (4207500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0786 9 episodes - episode_reward: -171.251 [-357.070, -100.000] - loss: 11.267 - mae: 85.528 - mean_q: -112.511 Interval 8417 (4208000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5940 6 episodes - episode_reward: -196.682 [-264.859, -147.200] - loss: 9.738 - mae: 85.479 - mean_q: -112.505 Interval 8418 (4208500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7276 9 episodes - episode_reward: -162.167 [-240.468, 1.357] - loss: 13.290 - mae: 85.421 - mean_q: -112.498 Interval 8419 (4209000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7476 8 episodes - episode_reward: -170.839 [-288.047, 41.655] - loss: 12.525 - mae: 85.375 - mean_q: -112.462 Interval 8420 (4209500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0179 8 episodes - episode_reward: -187.350 [-291.377, -128.407] - loss: 13.352 - mae: 85.331 - mean_q: -112.419 Interval 8421 (4210000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2041 8 episodes - episode_reward: -192.645 [-304.026, -139.194] - loss: 9.219 - mae: 85.257 - mean_q: -112.378 Interval 8422 (4210500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0539 9 episodes - episode_reward: -178.754 [-292.906, -93.368] - loss: 14.051 - mae: 85.240 - mean_q: -112.355 Interval 8423 (4211000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8091 7 episodes - episode_reward: -200.192 [-359.732, -22.573] - loss: 12.951 - mae: 85.200 - mean_q: -112.314 Interval 8424 (4211500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2886 7 episodes - episode_reward: -169.190 [-239.465, -58.974] - loss: 12.732 - mae: 85.154 - mean_q: -112.273 Interval 8425 (4212000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6741 8 episodes - episode_reward: -160.334 [-215.997, -61.355] - loss: 12.706 - mae: 85.103 - mean_q: -112.269 Interval 8426 (4212500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5631 6 episodes - episode_reward: -208.485 [-395.523, -123.037] - loss: 9.177 - mae: 85.062 - mean_q: -112.245 Interval 8427 (4213000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3684 6 episodes - episode_reward: -200.254 [-284.546, -130.738] - loss: 11.603 - mae: 85.060 - mean_q: -112.234 Interval 8428 (4213500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6551 7 episodes - episode_reward: -192.798 [-253.894, -117.172] - loss: 9.296 - mae: 85.006 - mean_q: -112.234 Interval 8429 (4214000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4114 6 episodes - episode_reward: -190.441 [-248.108, -154.243] - loss: 12.477 - mae: 84.989 - mean_q: -112.217 Interval 8430 (4214500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6876 8 episodes - episode_reward: -168.345 [-225.830, -128.595] - loss: 10.283 - mae: 84.956 - mean_q: -112.181 Interval 8431 (4215000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5489 8 episodes - episode_reward: -161.087 [-261.670, 11.224] - loss: 10.832 - mae: 84.920 - mean_q: -112.180 Interval 8432 (4215500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5658 7 episodes - episode_reward: -179.581 [-289.888, -132.688] - loss: 11.276 - mae: 84.892 - mean_q: -112.165 Interval 8433 (4216000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3371 9 episodes - episode_reward: -193.121 [-227.390, -157.447] - loss: 11.924 - mae: 84.877 - mean_q: -112.154 Interval 8434 (4216500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0227 9 episodes - episode_reward: -167.220 [-219.529, -136.352] - loss: 11.748 - mae: 84.862 - mean_q: -112.132 Interval 8435 (4217000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4643 10 episodes - episode_reward: -176.830 [-219.519, -100.000] - loss: 14.256 - mae: 84.855 - mean_q: -112.101 Interval 8436 (4217500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0626 9 episodes - episode_reward: -164.933 [-262.110, -111.251] - loss: 15.565 - mae: 84.828 - mean_q: -112.046 Interval 8437 (4218000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7347 7 episodes - episode_reward: -197.851 [-278.458, -136.951] - loss: 11.554 - mae: 84.788 - mean_q: -112.017 Interval 8438 (4218500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8991 8 episodes - episode_reward: -184.313 [-244.693, -144.192] - loss: 14.002 - mae: 84.774 - mean_q: -111.996 Interval 8439 (4219000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8403 8 episodes - episode_reward: -172.058 [-233.922, -124.756] - loss: 13.205 - mae: 84.747 - mean_q: -111.944 Interval 8440 (4219500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1725 8 episodes - episode_reward: -186.447 [-212.940, -111.897] - loss: 12.464 - mae: 84.708 - mean_q: -111.917 Interval 8441 (4220000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.6071 9 episodes - episode_reward: -211.405 [-266.818, -110.990] - loss: 16.483 - mae: 84.705 - mean_q: -111.915 Interval 8442 (4220500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0526 7 episodes - episode_reward: -223.687 [-297.590, -167.699] - loss: 10.417 - mae: 84.642 - mean_q: -111.895 Interval 8443 (4221000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6836 6 episodes - episode_reward: -128.976 [-297.587, 47.172] - loss: 15.189 - mae: 84.637 - mean_q: -111.879 Interval 8444 (4221500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6973 7 episodes - episode_reward: -193.536 [-311.756, -126.085] - loss: 10.701 - mae: 84.616 - mean_q: -111.883 Interval 8445 (4222000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2145 8 episodes - episode_reward: -208.747 [-364.421, -117.216] - loss: 8.534 - mae: 84.593 - mean_q: -111.906 Interval 8446 (4222500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6843 8 episodes - episode_reward: -163.821 [-204.681, -134.274] - loss: 13.348 - mae: 84.605 - mean_q: -111.909 Interval 8447 (4223000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5186 8 episodes - episode_reward: -160.084 [-216.669, -116.402] - loss: 9.507 - mae: 84.595 - mean_q: -111.914 Interval 8448 (4223500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1446 7 episodes - episode_reward: -212.756 [-421.718, -100.000] - loss: 14.132 - mae: 84.603 - mean_q: -111.952 Interval 8449 (4224000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9781 8 episodes - episode_reward: -196.558 [-334.839, -23.275] - loss: 16.297 - mae: 84.609 - mean_q: -111.890 Interval 8450 (4224500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2753 8 episodes - episode_reward: -140.259 [-188.516, 13.555] - loss: 13.448 - mae: 84.581 - mean_q: -111.877 Interval 8451 (4225000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0828 10 episodes - episode_reward: -155.850 [-246.342, -100.000] - loss: 17.914 - mae: 84.581 - mean_q: -111.855 Interval 8452 (4225500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1619 9 episodes - episode_reward: -167.106 [-271.913, 91.593] - loss: 9.532 - mae: 84.516 - mean_q: -111.853 Interval 8453 (4226000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4402 8 episodes - episode_reward: -208.480 [-266.598, -141.259] - loss: 15.693 - mae: 84.537 - mean_q: -111.879 Interval 8454 (4226500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8607 8 episodes - episode_reward: -195.054 [-328.057, 15.586] - loss: 14.008 - mae: 84.538 - mean_q: -111.870 Interval 8455 (4227000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7276 6 episodes - episode_reward: -135.499 [-204.037, -10.702] - loss: 15.123 - mae: 84.519 - mean_q: -111.870 Interval 8456 (4227500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4810 7 episodes - episode_reward: -185.262 [-304.393, -4.885] - loss: 11.066 - mae: 84.494 - mean_q: -111.854 Interval 8457 (4228000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6904 7 episodes - episode_reward: -187.019 [-229.710, -155.542] - loss: 13.427 - mae: 84.511 - mean_q: -111.863 Interval 8458 (4228500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3212 7 episodes - episode_reward: -170.982 [-306.394, -44.734] - loss: 13.940 - mae: 84.495 - mean_q: -111.863 Interval 8459 (4229000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9614 8 episodes - episode_reward: -186.153 [-243.335, -118.776] - loss: 15.549 - mae: 84.500 - mean_q: -111.857 Interval 8460 (4229500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5464 8 episodes - episode_reward: -280.041 [-614.596, -114.297] - loss: 12.515 - mae: 84.487 - mean_q: -111.869 Interval 8461 (4230000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5515 7 episodes - episode_reward: -320.949 [-746.353, -192.422] - loss: 15.738 - mae: 84.522 - mean_q: -111.864 Interval 8462 (4230500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0595 8 episodes - episode_reward: -181.457 [-244.419, -109.443] - loss: 12.230 - mae: 84.523 - mean_q: -111.849 Interval 8463 (4231000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2784 9 episodes - episode_reward: -187.531 [-266.286, -120.240] - loss: 13.905 - mae: 84.561 - mean_q: -111.858 Interval 8464 (4231500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6945 7 episodes - episode_reward: -188.093 [-312.608, -45.727] - loss: 8.674 - mae: 84.569 - mean_q: -111.872 Interval 8465 (4232000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3150 8 episodes - episode_reward: -202.687 [-257.744, -146.660] - loss: 10.275 - mae: 84.597 - mean_q: -111.889 Interval 8466 (4232500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8239 8 episodes - episode_reward: -188.510 [-372.393, -113.274] - loss: 10.641 - mae: 84.635 - mean_q: -111.906 Interval 8467 (4233000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1580 7 episodes - episode_reward: -152.934 [-206.619, -81.587] - loss: 14.746 - mae: 84.660 - mean_q: -111.936 Interval 8468 (4233500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0890 6 episodes - episode_reward: -166.420 [-212.962, -135.073] - loss: 17.449 - mae: 84.682 - mean_q: -111.913 Interval 8469 (4234000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8369 8 episodes - episode_reward: -191.357 [-298.262, -83.083] - loss: 16.988 - mae: 84.683 - mean_q: -111.881 Interval 8470 (4234500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6423 7 episodes - episode_reward: -169.529 [-253.306, -122.936] - loss: 11.445 - mae: 84.674 - mean_q: -111.868 Interval 8471 (4235000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4295 8 episodes - episode_reward: -167.027 [-234.783, -52.338] - loss: 15.455 - mae: 84.655 - mean_q: -111.850 Interval 8472 (4235500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7093 8 episodes - episode_reward: -154.259 [-181.960, -108.169] - loss: 18.478 - mae: 84.637 - mean_q: -111.833 Interval 8473 (4236000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7992 7 episodes - episode_reward: -205.994 [-259.197, -160.786] - loss: 19.776 - mae: 84.610 - mean_q: -111.768 Interval 8474 (4236500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0368 7 episodes - episode_reward: -224.251 [-285.831, -161.202] - loss: 15.567 - mae: 84.564 - mean_q: -111.737 Interval 8475 (4237000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4885 7 episodes - episode_reward: -173.774 [-217.961, -115.563] - loss: 13.609 - mae: 84.534 - mean_q: -111.720 Interval 8476 (4237500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6179 7 episodes - episode_reward: -191.317 [-234.234, -146.249] - loss: 13.360 - mae: 84.489 - mean_q: -111.715 Interval 8477 (4238000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2315 5 episodes - episode_reward: -493.197 [-935.706, -293.203] - loss: 12.134 - mae: 84.483 - mean_q: -111.666 Interval 8478 (4238500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7778 3 episodes - episode_reward: -712.656 [-1015.941, -216.459] - loss: 14.070 - mae: 84.470 - mean_q: -111.637 Interval 8479 (4239000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2222 7 episodes - episode_reward: -361.227 [-597.307, -204.410] - loss: 11.310 - mae: 84.478 - mean_q: -111.665 Interval 8480 (4239500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6823 7 episodes - episode_reward: -184.794 [-225.222, -142.971] - loss: 13.074 - mae: 84.494 - mean_q: -111.653 Interval 8481 (4240000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4233 7 episodes - episode_reward: -178.224 [-259.117, -38.052] - loss: 13.194 - mae: 84.497 - mean_q: -111.653 Interval 8482 (4240500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5285 6 episodes - episode_reward: -196.321 [-312.783, -110.654] - loss: 13.819 - mae: 84.507 - mean_q: -111.669 Interval 8483 (4241000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8895 8 episodes - episode_reward: -182.511 [-223.225, -136.440] - loss: 11.843 - mae: 84.516 - mean_q: -111.660 Interval 8484 (4241500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2898 8 episodes - episode_reward: -144.749 [-208.156, -34.388] - loss: 9.878 - mae: 84.514 - mean_q: -111.688 Interval 8485 (4242000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7343 8 episodes - episode_reward: -183.409 [-353.246, -113.317] - loss: 13.529 - mae: 84.535 - mean_q: -111.687 Interval 8486 (4242500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1216 7 episodes - episode_reward: -144.768 [-248.924, 31.210] - loss: 15.178 - mae: 84.542 - mean_q: -111.689 Interval 8487 (4243000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8066 8 episodes - episode_reward: -175.050 [-210.799, -100.000] - loss: 14.643 - mae: 84.511 - mean_q: -111.706 Interval 8488 (4243500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8389 9 episodes - episode_reward: -157.743 [-218.766, -122.170] - loss: 11.690 - mae: 84.509 - mean_q: -111.722 Interval 8489 (4244000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1659 10 episodes - episode_reward: -155.985 [-216.773, -123.402] - loss: 10.362 - mae: 84.520 - mean_q: -111.743 Interval 8490 (4244500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0110 8 episodes - episode_reward: -195.367 [-292.649, -101.343] - loss: 15.020 - mae: 84.565 - mean_q: -111.730 Interval 8491 (4245000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2489 7 episodes - episode_reward: -151.478 [-182.898, -105.783] - loss: 15.858 - mae: 84.569 - mean_q: -111.697 Interval 8492 (4245500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3769 8 episodes - episode_reward: -151.200 [-203.323, -19.201] - loss: 12.732 - mae: 84.565 - mean_q: -111.672 Interval 8493 (4246000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9918 8 episodes - episode_reward: -180.333 [-229.276, -148.450] - loss: 8.895 - mae: 84.547 - mean_q: -111.668 Interval 8494 (4246500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9301 9 episodes - episode_reward: -169.198 [-275.733, -99.248] - loss: 9.206 - mae: 84.538 - mean_q: -111.685 Interval 8495 (4247000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5248 10 episodes - episode_reward: -180.503 [-293.079, -120.627] - loss: 10.028 - mae: 84.562 - mean_q: -111.690 Interval 8496 (4247500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3193 9 episodes - episode_reward: -183.652 [-255.250, -108.224] - loss: 10.639 - mae: 84.566 - mean_q: -111.717 Interval 8497 (4248000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7499 7 episodes - episode_reward: -193.225 [-301.704, -120.458] - loss: 13.232 - mae: 84.558 - mean_q: -111.722 Interval 8498 (4248500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9272 9 episodes - episode_reward: -161.504 [-264.310, 4.899] - loss: 11.602 - mae: 84.572 - mean_q: -111.719 Interval 8499 (4249000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0693 9 episodes - episode_reward: -166.900 [-267.919, -100.000] - loss: 11.234 - mae: 84.584 - mean_q: -111.714 Interval 8500 (4249500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1093 8 episodes - episode_reward: -198.981 [-245.159, -142.541] - loss: 11.388 - mae: 84.581 - mean_q: -111.717 Interval 8501 (4250000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0254 9 episodes - episode_reward: -172.987 [-226.067, -109.151] - loss: 13.787 - mae: 84.597 - mean_q: -111.704 Interval 8502 (4250500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4170 6 episodes - episode_reward: -191.354 [-232.571, -155.217] - loss: 14.049 - mae: 84.601 - mean_q: -111.683 Interval 8503 (4251000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3088 9 episodes - episode_reward: -184.099 [-246.461, -100.000] - loss: 16.380 - mae: 84.593 - mean_q: -111.669 Interval 8504 (4251500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1422 7 episodes - episode_reward: -152.140 [-259.121, -1.151] - loss: 9.049 - mae: 84.570 - mean_q: -111.651 Interval 8505 (4252000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7481 10 episodes - episode_reward: -193.765 [-245.090, -111.184] - loss: 14.847 - mae: 84.598 - mean_q: -111.658 Interval 8506 (4252500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1177 6 episodes - episode_reward: -170.478 [-201.347, -157.553] - loss: 12.765 - mae: 84.586 - mean_q: -111.646 Interval 8507 (4253000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1092 8 episodes - episode_reward: -196.066 [-244.348, -139.212] - loss: 14.314 - mae: 84.598 - mean_q: -111.621 Interval 8508 (4253500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8433 6 episodes - episode_reward: -232.097 [-322.935, -160.442] - loss: 15.641 - mae: 84.606 - mean_q: -111.598 Interval 8509 (4254000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9681 8 episodes - episode_reward: -188.369 [-264.175, -125.236] - loss: 12.859 - mae: 84.586 - mean_q: -111.570 Interval 8510 (4254500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7558 7 episodes - episode_reward: -180.791 [-261.031, -124.695] - loss: 12.959 - mae: 84.580 - mean_q: -111.582 Interval 8511 (4255000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1183 7 episodes - episode_reward: -222.619 [-309.474, -127.210] - loss: 10.574 - mae: 84.580 - mean_q: -111.586 Interval 8512 (4255500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8283 8 episodes - episode_reward: -187.489 [-252.301, -138.831] - loss: 11.269 - mae: 84.588 - mean_q: -111.596 Interval 8513 (4256000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0497 7 episodes - episode_reward: -212.593 [-243.099, -177.704] - loss: 13.143 - mae: 84.604 - mean_q: -111.598 Interval 8514 (4256500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3720 8 episodes - episode_reward: -153.028 [-214.090, -3.367] - loss: 11.429 - mae: 84.580 - mean_q: -111.621 Interval 8515 (4257000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9201 8 episodes - episode_reward: -184.282 [-232.125, -150.157] - loss: 15.057 - mae: 84.607 - mean_q: -111.619 Interval 8516 (4257500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7370 7 episodes - episode_reward: -198.956 [-291.050, -144.326] - loss: 12.951 - mae: 84.587 - mean_q: -111.610 Interval 8517 (4258000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4725 9 episodes - episode_reward: -192.264 [-284.813, -119.284] - loss: 11.818 - mae: 84.569 - mean_q: -111.603 Interval 8518 (4258500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9037 8 episodes - episode_reward: -176.503 [-229.225, -114.505] - loss: 9.615 - mae: 84.561 - mean_q: -111.623 Interval 8519 (4259000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5466 11 episodes - episode_reward: -163.309 [-245.573, -100.000] - loss: 11.581 - mae: 84.591 - mean_q: -111.647 Interval 8520 (4259500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4148 10 episodes - episode_reward: -216.998 [-696.897, -28.724] - loss: 10.422 - mae: 84.586 - mean_q: -111.662 Interval 8521 (4260000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5971 8 episodes - episode_reward: -226.201 [-536.789, 6.818] - loss: 11.812 - mae: 84.613 - mean_q: -111.671 Interval 8522 (4260500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1132 9 episodes - episode_reward: -180.829 [-291.911, -100.000] - loss: 12.563 - mae: 84.636 - mean_q: -111.690 Interval 8523 (4261000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2433 9 episodes - episode_reward: -179.015 [-278.597, -108.845] - loss: 11.577 - mae: 84.637 - mean_q: -111.708 Interval 8524 (4261500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3372 7 episodes - episode_reward: -165.723 [-282.642, -97.046] - loss: 15.343 - mae: 84.679 - mean_q: -111.686 Interval 8525 (4262000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0758 8 episodes - episode_reward: -189.257 [-289.200, -100.000] - loss: 15.093 - mae: 84.685 - mean_q: -111.661 Interval 8526 (4262500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2669 6 episodes - episode_reward: -178.281 [-262.662, -64.073] - loss: 10.350 - mae: 84.659 - mean_q: -111.663 Interval 8527 (4263000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3901 8 episodes - episode_reward: -158.099 [-235.154, -10.018] - loss: 11.680 - mae: 84.670 - mean_q: -111.665 Interval 8528 (4263500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7016 8 episodes - episode_reward: -171.428 [-226.769, -120.819] - loss: 12.400 - mae: 84.683 - mean_q: -111.674 Interval 8529 (4264000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4945 7 episodes - episode_reward: -248.565 [-354.142, -174.531] - loss: 9.108 - mae: 84.674 - mean_q: -111.673 Interval 8530 (4264500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2164 7 episodes - episode_reward: -223.154 [-330.279, -126.317] - loss: 10.350 - mae: 84.685 - mean_q: -111.698 Interval 8531 (4265000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7522 9 episodes - episode_reward: -154.374 [-257.087, 4.430] - loss: 15.532 - mae: 84.733 - mean_q: -111.704 Interval 8532 (4265500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1550 8 episodes - episode_reward: -201.157 [-271.121, -134.949] - loss: 11.089 - mae: 84.733 - mean_q: -111.710 Interval 8533 (4266000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9216 7 episodes - episode_reward: -198.657 [-241.954, -140.560] - loss: 10.554 - mae: 84.749 - mean_q: -111.737 Interval 8534 (4266500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3044 10 episodes - episode_reward: -172.672 [-210.160, -100.000] - loss: 10.454 - mae: 84.763 - mean_q: -111.765 Interval 8535 (4267000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1699 7 episodes - episode_reward: -148.042 [-190.751, -86.455] - loss: 9.059 - mae: 84.783 - mean_q: -111.801 Interval 8536 (4267500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1218 7 episodes - episode_reward: -214.148 [-272.823, -149.951] - loss: 13.261 - mae: 84.809 - mean_q: -111.818 Interval 8537 (4268000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7237 8 episodes - episode_reward: -161.304 [-258.663, -100.000] - loss: 9.944 - mae: 84.810 - mean_q: -111.844 Interval 8538 (4268500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2837 8 episodes - episode_reward: -165.170 [-270.819, -68.957] - loss: 7.168 - mae: 84.824 - mean_q: -111.900 Interval 8539 (4269000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9677 6 episodes - episode_reward: -152.754 [-243.508, 11.870] - loss: 12.247 - mae: 84.872 - mean_q: -111.915 Interval 8540 (4269500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3734 7 episodes - episode_reward: -158.067 [-235.149, -26.328] - loss: 12.182 - mae: 84.874 - mean_q: -111.894 Interval 8541 (4270000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3807 10 episodes - episode_reward: -183.362 [-264.071, -100.000] - loss: 15.006 - mae: 84.895 - mean_q: -111.879 Interval 8542 (4270500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5031 7 episodes - episode_reward: -148.862 [-263.055, -50.606] - loss: 15.491 - mae: 84.905 - mean_q: -111.869 Interval 8543 (4271000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0378 8 episodes - episode_reward: -211.278 [-281.879, -173.956] - loss: 9.726 - mae: 84.887 - mean_q: -111.868 Interval 8544 (4271500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1740 9 episodes - episode_reward: -177.952 [-215.569, -123.395] - loss: 13.249 - mae: 84.911 - mean_q: -111.864 Interval 8545 (4272000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1084 7 episodes - episode_reward: -158.082 [-188.764, -102.655] - loss: 10.416 - mae: 84.924 - mean_q: -111.873 Interval 8546 (4272500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1611 7 episodes - episode_reward: -152.636 [-217.518, -61.232] - loss: 11.845 - mae: 84.926 - mean_q: -111.882 Interval 8547 (4273000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5948 5 episodes - episode_reward: -231.412 [-470.030, -66.618] - loss: 10.545 - mae: 84.920 - mean_q: -111.879 Interval 8548 (4273500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4778 9 episodes - episode_reward: -208.130 [-357.923, -110.718] - loss: 12.456 - mae: 84.924 - mean_q: -111.898 Interval 8549 (4274000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3354 6 episodes - episode_reward: -193.951 [-266.672, -61.268] - loss: 13.867 - mae: 84.946 - mean_q: -111.895 Interval 8550 (4274500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7653 7 episodes - episode_reward: -200.419 [-256.267, -143.132] - loss: 13.298 - mae: 84.952 - mean_q: -111.868 Interval 8551 (4275000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2084 7 episodes - episode_reward: -152.202 [-271.601, 51.447] - loss: 11.918 - mae: 84.950 - mean_q: -111.866 Interval 8552 (4275500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9435 7 episodes - episode_reward: -205.369 [-342.552, -116.224] - loss: 9.757 - mae: 84.947 - mean_q: -111.853 Interval 8553 (4276000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2916 9 episodes - episode_reward: -187.545 [-238.507, -138.915] - loss: 8.813 - mae: 84.930 - mean_q: -111.865 Interval 8554 (4276500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6648 8 episodes - episode_reward: -155.159 [-270.200, 11.003] - loss: 8.902 - mae: 84.933 - mean_q: -111.888 Interval 8555 (4277000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9391 8 episodes - episode_reward: -194.782 [-244.268, -165.736] - loss: 10.180 - mae: 84.934 - mean_q: -111.918 Interval 8556 (4277500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1081 6 episodes - episode_reward: -178.687 [-328.112, 37.867] - loss: 10.570 - mae: 84.939 - mean_q: -111.926 Interval 8557 (4278000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7758 7 episodes - episode_reward: -189.939 [-225.720, -124.788] - loss: 12.392 - mae: 84.955 - mean_q: -111.950 Interval 8558 (4278500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8725 8 episodes - episode_reward: -183.605 [-243.039, -143.689] - loss: 9.766 - mae: 84.985 - mean_q: -111.964 Interval 8559 (4279000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0832 9 episodes - episode_reward: -170.577 [-288.439, -120.491] - loss: 10.186 - mae: 84.986 - mean_q: -111.966 Interval 8560 (4279500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8912 8 episodes - episode_reward: -176.161 [-228.726, -70.736] - loss: 7.860 - mae: 84.944 - mean_q: -111.979 Interval 8561 (4280000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1547 7 episodes - episode_reward: -233.319 [-502.546, -160.309] - loss: 14.614 - mae: 84.950 - mean_q: -111.962 Interval 8562 (4280500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7801 7 episodes - episode_reward: -194.400 [-238.236, -122.122] - loss: 11.854 - mae: 84.928 - mean_q: -111.956 Interval 8563 (4281000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9685 8 episodes - episode_reward: -192.551 [-249.702, -143.433] - loss: 11.870 - mae: 84.894 - mean_q: -111.943 Interval 8564 (4281500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5256 6 episodes - episode_reward: -203.954 [-295.061, -145.666] - loss: 9.385 - mae: 84.859 - mean_q: -111.953 Interval 8565 (4282000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1015 9 episodes - episode_reward: -167.805 [-256.936, -100.000] - loss: 9.169 - mae: 84.851 - mean_q: -111.972 Interval 8566 (4282500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5880 9 episodes - episode_reward: -154.906 [-219.697, -42.264] - loss: 9.083 - mae: 84.841 - mean_q: -111.988 Interval 8567 (4283000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3979 11 episodes - episode_reward: -148.150 [-274.092, -26.020] - loss: 13.180 - mae: 84.850 - mean_q: -111.985 Interval 8568 (4283500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8572 8 episodes - episode_reward: -182.181 [-414.930, 0.997] - loss: 12.521 - mae: 84.842 - mean_q: -111.971 Interval 8569 (4284000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1883 10 episodes - episode_reward: -156.355 [-231.445, -100.000] - loss: 12.162 - mae: 84.828 - mean_q: -111.964 Interval 8570 (4284500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8645 8 episodes - episode_reward: -171.671 [-266.991, -108.696] - loss: 11.193 - mae: 84.821 - mean_q: -111.975 Interval 8571 (4285000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7383 9 episodes - episode_reward: -168.222 [-310.770, -5.861] - loss: 11.416 - mae: 84.820 - mean_q: -111.967 Interval 8572 (4285500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0721 6 episodes - episode_reward: -245.878 [-301.685, -189.140] - loss: 11.017 - mae: 84.822 - mean_q: -111.967 Interval 8573 (4286000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6408 7 episodes - episode_reward: -178.057 [-333.179, -134.891] - loss: 9.756 - mae: 84.811 - mean_q: -111.984 Interval 8574 (4286500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1679 9 episodes - episode_reward: -175.360 [-247.652, -122.088] - loss: 14.449 - mae: 84.839 - mean_q: -111.997 Interval 8575 (4287000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9559 7 episodes - episode_reward: -211.474 [-283.149, -143.258] - loss: 11.842 - mae: 84.819 - mean_q: -111.972 Interval 8576 (4287500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8030 8 episodes - episode_reward: -186.754 [-272.684, -129.750] - loss: 12.218 - mae: 84.811 - mean_q: -111.970 Interval 8577 (4288000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2166 7 episodes - episode_reward: -224.763 [-351.594, -139.548] - loss: 11.836 - mae: 84.781 - mean_q: -111.975 Interval 8578 (4288500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5522 11 episodes - episode_reward: -164.537 [-259.289, -100.000] - loss: 12.055 - mae: 84.743 - mean_q: -111.973 Interval 8579 (4289000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7466 9 episodes - episode_reward: -150.919 [-291.525, -33.050] - loss: 11.952 - mae: 84.712 - mean_q: -111.986 Interval 8580 (4289500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7400 8 episodes - episode_reward: -179.191 [-270.677, -124.550] - loss: 10.216 - mae: 84.666 - mean_q: -111.999 Interval 8581 (4290000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5201 6 episodes - episode_reward: -173.213 [-206.858, -152.577] - loss: 12.554 - mae: 84.642 - mean_q: -112.016 Interval 8582 (4290500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2457 7 episodes - episode_reward: -169.928 [-325.926, -25.772] - loss: 12.776 - mae: 84.636 - mean_q: -112.023 Interval 8583 (4291000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7828 8 episodes - episode_reward: -309.737 [-569.756, -123.342] - loss: 11.431 - mae: 84.613 - mean_q: -111.989 Interval 8584 (4291500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4221 3 episodes - episode_reward: -743.596 [-1302.186, -411.108] - loss: 12.082 - mae: 84.610 - mean_q: -111.979 Interval 8585 (4292000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7734 7 episodes - episode_reward: -197.685 [-264.234, -61.521] - loss: 9.793 - mae: 84.627 - mean_q: -111.996 Interval 8586 (4292500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0944 7 episodes - episode_reward: -147.560 [-215.497, 0.591] - loss: 13.358 - mae: 84.653 - mean_q: -111.994 Interval 8587 (4293000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7229 10 episodes - episode_reward: -187.973 [-324.836, -100.000] - loss: 13.261 - mae: 84.669 - mean_q: -111.990 Interval 8588 (4293500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4737 7 episodes - episode_reward: -176.660 [-373.514, 39.481] - loss: 9.122 - mae: 84.654 - mean_q: -111.990 Interval 8589 (4294000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9069 9 episodes - episode_reward: -154.643 [-188.379, -112.520] - loss: 12.402 - mae: 84.686 - mean_q: -112.000 Interval 8590 (4294500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9342 9 episodes - episode_reward: -164.990 [-214.994, -100.000] - loss: 15.000 - mae: 84.707 - mean_q: -112.010 Interval 8591 (4295000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8013 8 episodes - episode_reward: -175.473 [-264.965, -108.047] - loss: 13.314 - mae: 84.719 - mean_q: -112.013 Interval 8592 (4295500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9157 7 episodes - episode_reward: -203.663 [-265.308, -162.319] - loss: 12.984 - mae: 84.713 - mean_q: -111.988 Interval 8593 (4296000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4505 10 episodes - episode_reward: -173.824 [-252.381, -100.000] - loss: 10.991 - mae: 84.701 - mean_q: -112.001 Interval 8594 (4296500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1018 8 episodes - episode_reward: -192.756 [-241.654, -137.192] - loss: 13.744 - mae: 84.730 - mean_q: -111.998 Interval 8595 (4297000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9996 10 episodes - episode_reward: -152.125 [-193.826, -88.607] - loss: 13.864 - mae: 84.712 - mean_q: -111.971 Interval 8596 (4297500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3311 8 episodes - episode_reward: -150.731 [-182.630, -109.317] - loss: 9.703 - mae: 84.687 - mean_q: -111.988 Interval 8597 (4298000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5850 8 episodes - episode_reward: -161.387 [-222.715, -4.899] - loss: 10.912 - mae: 84.704 - mean_q: -112.030 Interval 8598 (4298500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9503 7 episodes - episode_reward: -203.317 [-298.672, -114.626] - loss: 13.171 - mae: 84.704 - mean_q: -112.023 Interval 8599 (4299000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5232 7 episodes - episode_reward: -187.674 [-242.613, -89.197] - loss: 13.942 - mae: 84.712 - mean_q: -112.022 Interval 8600 (4299500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5658 8 episodes - episode_reward: -158.897 [-236.011, -63.108] - loss: 13.709 - mae: 84.723 - mean_q: -112.012 Interval 8601 (4300000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5853 7 episodes - episode_reward: -180.460 [-302.936, -121.855] - loss: 13.244 - mae: 84.730 - mean_q: -111.998 Interval 8602 (4300500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3907 9 episodes - episode_reward: -188.791 [-239.252, -136.680] - loss: 14.797 - mae: 84.737 - mean_q: -111.969 Interval 8603 (4301000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7595 7 episodes - episode_reward: -198.025 [-236.081, -134.303] - loss: 12.110 - mae: 84.723 - mean_q: -111.951 Interval 8604 (4301500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9984 8 episodes - episode_reward: -187.723 [-221.112, -148.311] - loss: 14.469 - mae: 84.744 - mean_q: -111.942 Interval 8605 (4302000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1352 7 episodes - episode_reward: -156.947 [-225.360, 7.088] - loss: 13.782 - mae: 84.721 - mean_q: -111.917 Interval 8606 (4302500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3620 6 episodes - episode_reward: -194.266 [-248.528, -103.157] - loss: 10.851 - mae: 84.699 - mean_q: -111.936 Interval 8607 (4303000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1960 10 episodes - episode_reward: -159.349 [-210.176, -100.000] - loss: 13.039 - mae: 84.715 - mean_q: -111.936 Interval 8608 (4303500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7745 7 episodes - episode_reward: -193.779 [-249.401, -142.301] - loss: 14.911 - mae: 84.725 - mean_q: -111.924 Interval 8609 (4304000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1989 9 episodes - episode_reward: -169.029 [-277.622, -100.000] - loss: 10.939 - mae: 84.706 - mean_q: -111.939 Interval 8610 (4304500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9152 9 episodes - episode_reward: -174.860 [-252.798, -80.623] - loss: 14.663 - mae: 84.726 - mean_q: -111.923 Interval 8611 (4305000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9840 6 episodes - episode_reward: -242.201 [-453.517, -145.109] - loss: 11.299 - mae: 84.689 - mean_q: -111.909 Interval 8612 (4305500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7128 7 episodes - episode_reward: -193.695 [-240.796, -130.600] - loss: 9.321 - mae: 84.663 - mean_q: -111.944 Interval 8613 (4306000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4100 7 episodes - episode_reward: -172.462 [-325.845, -35.516] - loss: 16.504 - mae: 84.708 - mean_q: -111.963 Interval 8614 (4306500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4599 9 episodes - episode_reward: -191.285 [-275.129, -100.000] - loss: 13.128 - mae: 84.691 - mean_q: -111.946 Interval 8615 (4307000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4585 7 episodes - episode_reward: -179.591 [-242.099, -141.756] - loss: 12.190 - mae: 84.682 - mean_q: -111.936 Interval 8616 (4307500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0700 7 episodes - episode_reward: -218.194 [-283.965, -155.713] - loss: 14.444 - mae: 84.699 - mean_q: -111.945 Interval 8617 (4308000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7624 11 episodes - episode_reward: -174.112 [-248.795, -100.000] - loss: 12.649 - mae: 84.701 - mean_q: -111.944 Interval 8618 (4308500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7362 8 episodes - episode_reward: -171.888 [-200.260, -149.676] - loss: 11.089 - mae: 84.682 - mean_q: -111.955 Interval 8619 (4309000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0031 8 episodes - episode_reward: -184.969 [-208.386, -154.340] - loss: 16.070 - mae: 84.691 - mean_q: -111.954 Interval 8620 (4309500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8413 6 episodes - episode_reward: -131.323 [-205.212, 18.123] - loss: 11.107 - mae: 84.647 - mean_q: -111.943 Interval 8621 (4310000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3584 8 episodes - episode_reward: -224.753 [-325.871, -158.557] - loss: 10.189 - mae: 84.621 - mean_q: -111.944 Interval 8622 (4310500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8057 7 episodes - episode_reward: -196.958 [-303.401, -128.072] - loss: 13.206 - mae: 84.617 - mean_q: -111.952 Interval 8623 (4311000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3496 10 episodes - episode_reward: -161.852 [-216.381, -100.000] - loss: 13.486 - mae: 84.613 - mean_q: -111.938 Interval 8624 (4311500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6309 7 episodes - episode_reward: -195.356 [-239.580, -155.514] - loss: 12.543 - mae: 84.581 - mean_q: -111.920 Interval 8625 (4312000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0543 9 episodes - episode_reward: -165.044 [-220.351, -100.000] - loss: 13.040 - mae: 84.573 - mean_q: -111.916 Interval 8626 (4312500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0551 8 episodes - episode_reward: -193.073 [-382.662, -148.459] - loss: 13.205 - mae: 84.567 - mean_q: -111.906 Interval 8627 (4313000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1423 9 episodes - episode_reward: -181.454 [-253.180, -100.000] - loss: 12.702 - mae: 84.536 - mean_q: -111.917 Interval 8628 (4313500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2554 7 episodes - episode_reward: -151.202 [-219.283, -28.489] - loss: 10.453 - mae: 84.534 - mean_q: -111.919 Interval 8629 (4314000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5055 7 episodes - episode_reward: -182.944 [-300.155, -94.939] - loss: 13.504 - mae: 84.524 - mean_q: -111.920 Interval 8630 (4314500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5892 11 episodes - episode_reward: -167.274 [-278.360, -122.497] - loss: 14.230 - mae: 84.520 - mean_q: -111.897 Interval 8631 (4315000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2357 10 episodes - episode_reward: -160.685 [-239.608, -107.314] - loss: 10.315 - mae: 84.500 - mean_q: -111.890 Interval 8632 (4315500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8370 8 episodes - episode_reward: -174.639 [-221.548, -133.647] - loss: 13.010 - mae: 84.505 - mean_q: -111.883 Interval 8633 (4316000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8592 8 episodes - episode_reward: -185.634 [-243.786, -133.135] - loss: 14.998 - mae: 84.516 - mean_q: -111.875 Interval 8634 (4316500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9798 8 episodes - episode_reward: -179.767 [-257.179, -61.222] - loss: 13.802 - mae: 84.483 - mean_q: -111.850 Interval 8635 (4317000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8420 7 episodes - episode_reward: -202.990 [-372.601, -100.000] - loss: 11.982 - mae: 84.464 - mean_q: -111.841 Interval 8636 (4317500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8394 8 episodes - episode_reward: -183.750 [-272.221, -100.000] - loss: 10.168 - mae: 84.445 - mean_q: -111.853 Interval 8637 (4318000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9777 7 episodes - episode_reward: -203.826 [-311.311, -98.021] - loss: 14.163 - mae: 84.461 - mean_q: -111.862 Interval 8638 (4318500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3724 6 episodes - episode_reward: -196.975 [-221.914, -182.204] - loss: 11.699 - mae: 84.463 - mean_q: -111.863 Interval 8639 (4319000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8534 9 episodes - episode_reward: -164.522 [-240.125, -101.531] - loss: 10.205 - mae: 84.455 - mean_q: -111.868 Interval 8640 (4319500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4246 7 episodes - episode_reward: -161.083 [-246.341, -105.205] - loss: 8.352 - mae: 84.442 - mean_q: -111.894 Interval 8641 (4320000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5828 9 episodes - episode_reward: -206.176 [-313.853, -147.015] - loss: 14.774 - mae: 84.480 - mean_q: -111.909 Interval 8642 (4320500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8874 8 episodes - episode_reward: -247.855 [-483.909, -145.235] - loss: 11.408 - mae: 84.449 - mean_q: -111.902 Interval 8643 (4321000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4012 9 episodes - episode_reward: -297.579 [-800.657, -100.669] - loss: 11.827 - mae: 84.473 - mean_q: -111.941 Interval 8644 (4321500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8344 6 episodes - episode_reward: -230.177 [-315.238, -167.128] - loss: 14.734 - mae: 84.503 - mean_q: -111.947 Interval 8645 (4322000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4889 8 episodes - episode_reward: -218.878 [-376.234, -124.238] - loss: 11.625 - mae: 84.503 - mean_q: -111.965 Interval 8646 (4322500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8629 9 episodes - episode_reward: -162.332 [-231.414, -108.446] - loss: 13.704 - mae: 84.522 - mean_q: -111.990 Interval 8647 (4323000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3638 6 episodes - episode_reward: -181.686 [-266.437, -56.944] - loss: 7.839 - mae: 84.522 - mean_q: -112.011 Interval 8648 (4323500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7550 7 episodes - episode_reward: -194.152 [-352.648, -96.022] - loss: 10.728 - mae: 84.568 - mean_q: -112.022 Interval 8649 (4324000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5341 9 episodes - episode_reward: -149.009 [-226.843, -11.665] - loss: 16.668 - mae: 84.616 - mean_q: -112.031 Interval 8650 (4324500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4003 8 episodes - episode_reward: -152.381 [-238.951, -13.494] - loss: 12.896 - mae: 84.618 - mean_q: -112.018 Interval 8651 (4325000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4380 6 episodes - episode_reward: -193.847 [-256.096, -145.721] - loss: 10.046 - mae: 84.612 - mean_q: -112.029 Interval 8652 (4325500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9211 11 episodes - episode_reward: -140.945 [-189.092, -82.985] - loss: 14.621 - mae: 84.630 - mean_q: -112.030 Interval 8653 (4326000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3575 7 episodes - episode_reward: -168.179 [-179.051, -142.778] - loss: 15.809 - mae: 84.641 - mean_q: -111.999 Interval 8654 (4326500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0847 6 episodes - episode_reward: -162.101 [-199.547, -82.708] - loss: 14.529 - mae: 84.640 - mean_q: -111.979 Interval 8655 (4327000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7837 9 episodes - episode_reward: -157.951 [-237.207, -82.944] - loss: 12.974 - mae: 84.643 - mean_q: -111.965 Interval 8656 (4327500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2012 7 episodes - episode_reward: -158.803 [-227.722, -28.419] - loss: 11.628 - mae: 84.625 - mean_q: -111.971 Interval 8657 (4328000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9330 7 episodes - episode_reward: -213.976 [-353.551, -139.177] - loss: 11.465 - mae: 84.626 - mean_q: -111.974 Interval 8658 (4328500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4185 7 episodes - episode_reward: -170.832 [-206.835, -144.899] - loss: 13.244 - mae: 84.630 - mean_q: -111.988 Interval 8659 (4329000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0998 9 episodes - episode_reward: -171.166 [-236.708, -122.335] - loss: 11.741 - mae: 84.621 - mean_q: -111.972 Interval 8660 (4329500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0920 9 episodes - episode_reward: -174.097 [-211.344, -75.849] - loss: 10.047 - mae: 84.604 - mean_q: -111.986 Interval 8661 (4330000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4179 7 episodes - episode_reward: -162.626 [-229.953, 30.647] - loss: 12.560 - mae: 84.603 - mean_q: -111.999 Interval 8662 (4330500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9878 8 episodes - episode_reward: -185.830 [-267.118, -132.210] - loss: 9.301 - mae: 84.585 - mean_q: -111.998 Interval 8663 (4331000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.3182 7 episodes - episode_reward: -391.266 [-730.222, -186.385] - loss: 11.472 - mae: 84.606 - mean_q: -112.007 Interval 8664 (4331500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9492 8 episodes - episode_reward: -169.957 [-239.010, -100.000] - loss: 12.370 - mae: 84.630 - mean_q: -112.013 Interval 8665 (4332000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9986 9 episodes - episode_reward: -181.726 [-237.444, -100.000] - loss: 13.073 - mae: 84.671 - mean_q: -112.006 Interval 8666 (4332500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6214 7 episodes - episode_reward: -179.659 [-262.574, -100.000] - loss: 9.824 - mae: 84.672 - mean_q: -112.022 Interval 8667 (4333000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0332 7 episodes - episode_reward: -136.793 [-196.019, 3.299] - loss: 11.506 - mae: 84.703 - mean_q: -112.042 Interval 8668 (4333500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6933 8 episodes - episode_reward: -177.709 [-291.802, -35.839] - loss: 10.522 - mae: 84.716 - mean_q: -112.066 Interval 8669 (4334000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6254 7 episodes - episode_reward: -177.942 [-216.868, -149.977] - loss: 12.175 - mae: 84.741 - mean_q: -112.069 Interval 8670 (4334500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3989 9 episodes - episode_reward: -189.216 [-276.515, -154.793] - loss: 12.161 - mae: 84.728 - mean_q: -112.082 Interval 8671 (4335000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5352 7 episodes - episode_reward: -184.092 [-224.701, -143.744] - loss: 9.267 - mae: 84.750 - mean_q: -112.107 Interval 8672 (4335500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7921 7 episodes - episode_reward: -205.550 [-249.205, -137.582] - loss: 12.647 - mae: 84.752 - mean_q: -112.115 Interval 8673 (4336000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8376 8 episodes - episode_reward: -166.778 [-194.077, -140.832] - loss: 11.733 - mae: 84.758 - mean_q: -112.130 Interval 8674 (4336500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6887 7 episodes - episode_reward: -196.111 [-286.439, -125.990] - loss: 10.081 - mae: 84.775 - mean_q: -112.140 Interval 8675 (4337000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6133 8 episodes - episode_reward: -168.675 [-266.201, -113.524] - loss: 10.688 - mae: 84.791 - mean_q: -112.170 Interval 8676 (4337500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1447 11 episodes - episode_reward: -137.166 [-193.312, -23.307] - loss: 13.893 - mae: 84.804 - mean_q: -112.182 Interval 8677 (4338000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2645 6 episodes - episode_reward: -267.820 [-625.891, -129.859] - loss: 17.566 - mae: 84.831 - mean_q: -112.141 Interval 8678 (4338500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0743 8 episodes - episode_reward: -138.104 [-258.408, 13.887] - loss: 12.002 - mae: 84.825 - mean_q: -112.095 Interval 8679 (4339000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9394 9 episodes - episode_reward: -168.382 [-233.118, -100.000] - loss: 14.518 - mae: 84.846 - mean_q: -112.071 Interval 8680 (4339500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1117 8 episodes - episode_reward: -194.505 [-254.683, -118.895] - loss: 10.059 - mae: 84.819 - mean_q: -112.060 Interval 8681 (4340000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4900 6 episodes - episode_reward: -200.876 [-266.018, -137.674] - loss: 8.194 - mae: 84.810 - mean_q: -112.074 Interval 8682 (4340500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2221 8 episodes - episode_reward: -197.591 [-288.175, -125.172] - loss: 15.583 - mae: 84.854 - mean_q: -112.098 Interval 8683 (4341000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6133 9 episodes - episode_reward: -150.361 [-241.725, 2.884] - loss: 12.769 - mae: 84.819 - mean_q: -112.077 Interval 8684 (4341500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3090 7 episodes - episode_reward: -238.115 [-506.439, -152.868] - loss: 9.247 - mae: 84.787 - mean_q: -112.075 Interval 8685 (4342000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.8919 7 episodes - episode_reward: -418.983 [-557.715, -166.592] - loss: 12.965 - mae: 84.777 - mean_q: -112.011 Interval 8686 (4342500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8793 7 episodes - episode_reward: -272.289 [-635.926, 16.528] - loss: 14.304 - mae: 84.808 - mean_q: -112.022 Interval 8687 (4343000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0001 8 episodes - episode_reward: -187.662 [-243.198, -140.997] - loss: 13.223 - mae: 84.814 - mean_q: -112.017 Interval 8688 (4343500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3803 9 episodes - episode_reward: -138.783 [-201.170, 6.218] - loss: 12.879 - mae: 84.812 - mean_q: -112.031 Interval 8689 (4344000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9112 8 episodes - episode_reward: -180.837 [-233.143, -122.377] - loss: 11.716 - mae: 84.814 - mean_q: -112.002 Interval 8690 (4344500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6614 7 episodes - episode_reward: -188.666 [-231.506, -147.264] - loss: 10.572 - mae: 84.811 - mean_q: -112.000 Interval 8691 (4345000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2101 7 episodes - episode_reward: -219.622 [-360.261, -127.228] - loss: 10.786 - mae: 84.804 - mean_q: -112.028 Interval 8692 (4345500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3859 7 episodes - episode_reward: -172.536 [-198.536, -142.493] - loss: 11.829 - mae: 84.826 - mean_q: -112.001 Interval 8693 (4346000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8956 7 episodes - episode_reward: -217.382 [-389.865, -145.276] - loss: 17.060 - mae: 84.872 - mean_q: -111.993 Interval 8694 (4346500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3310 6 episodes - episode_reward: -181.407 [-263.210, -120.398] - loss: 11.363 - mae: 84.844 - mean_q: -111.967 Interval 8695 (4347000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8870 7 episodes - episode_reward: -208.767 [-276.267, -157.430] - loss: 9.687 - mae: 84.839 - mean_q: -111.988 Interval 8696 (4347500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7628 7 episodes - episode_reward: -206.861 [-256.680, -150.500] - loss: 10.197 - mae: 84.848 - mean_q: -112.022 Interval 8697 (4348000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1289 8 episodes - episode_reward: -130.085 [-205.611, 33.777] - loss: 12.674 - mae: 84.872 - mean_q: -112.022 Interval 8698 (4348500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0511 8 episodes - episode_reward: -189.752 [-270.139, -136.436] - loss: 13.157 - mae: 84.862 - mean_q: -112.015 Interval 8699 (4349000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4054 8 episodes - episode_reward: -207.340 [-286.799, -134.836] - loss: 10.209 - mae: 84.850 - mean_q: -112.020 Interval 8700 (4349500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9081 9 episodes - episode_reward: -165.713 [-256.220, -46.713] - loss: 13.965 - mae: 84.877 - mean_q: -112.025 Interval 8701 (4350000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5454 7 episodes - episode_reward: -178.307 [-225.778, -90.148] - loss: 7.833 - mae: 84.846 - mean_q: -112.035 Interval 8702 (4350500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4062 8 episodes - episode_reward: -219.323 [-365.896, -145.165] - loss: 8.441 - mae: 84.861 - mean_q: -112.099 Interval 8703 (4351000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4906 6 episodes - episode_reward: -187.250 [-230.480, -157.842] - loss: 10.660 - mae: 84.885 - mean_q: -112.138 Interval 8704 (4351500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0383 8 episodes - episode_reward: -199.056 [-277.148, -100.000] - loss: 12.585 - mae: 84.913 - mean_q: -112.141 Interval 8705 (4352000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8963 8 episodes - episode_reward: -187.591 [-249.229, -103.348] - loss: 14.291 - mae: 84.944 - mean_q: -112.138 Interval 8706 (4352500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0547 7 episodes - episode_reward: -209.243 [-245.930, -168.202] - loss: 14.035 - mae: 84.956 - mean_q: -112.115 Interval 8707 (4353000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3827 7 episodes - episode_reward: -169.153 [-212.475, -46.130] - loss: 11.160 - mae: 84.920 - mean_q: -112.120 Interval 8708 (4353500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7359 6 episodes - episode_reward: -232.333 [-263.347, -188.232] - loss: 13.721 - mae: 84.929 - mean_q: -112.172 Interval 8709 (4354000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0335 9 episodes - episode_reward: -172.321 [-232.997, -118.996] - loss: 10.986 - mae: 84.932 - mean_q: -112.174 Interval 8710 (4354500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8577 7 episodes - episode_reward: -204.941 [-265.270, -128.803] - loss: 10.383 - mae: 84.947 - mean_q: -112.192 Interval 8711 (4355000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9563 7 episodes - episode_reward: -200.130 [-410.172, -135.552] - loss: 13.631 - mae: 84.967 - mean_q: -112.218 Interval 8712 (4355500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3238 9 episodes - episode_reward: -186.651 [-230.601, -99.923] - loss: 11.375 - mae: 84.960 - mean_q: -112.212 Interval 8713 (4356000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8087 8 episodes - episode_reward: -177.675 [-241.806, -137.165] - loss: 11.203 - mae: 84.955 - mean_q: -112.218 Interval 8714 (4356500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9300 7 episodes - episode_reward: -209.487 [-325.101, -149.221] - loss: 12.286 - mae: 84.960 - mean_q: -112.220 Interval 8715 (4357000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7140 7 episodes - episode_reward: -201.009 [-317.296, -151.365] - loss: 12.754 - mae: 84.984 - mean_q: -112.208 Interval 8716 (4357500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5305 10 episodes - episode_reward: -172.774 [-233.975, -108.282] - loss: 11.297 - mae: 84.980 - mean_q: -112.216 Interval 8717 (4358000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9701 8 episodes - episode_reward: -182.083 [-210.042, -136.599] - loss: 10.192 - mae: 84.980 - mean_q: -112.227 Interval 8718 (4358500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5002 9 episodes - episode_reward: -197.472 [-226.891, -147.303] - loss: 10.826 - mae: 84.981 - mean_q: -112.256 Interval 8719 (4359000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1873 9 episodes - episode_reward: -172.158 [-244.580, -100.000] - loss: 13.979 - mae: 85.008 - mean_q: -112.289 Interval 8720 (4359500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0466 9 episodes - episode_reward: -177.338 [-296.960, -100.000] - loss: 10.066 - mae: 85.000 - mean_q: -112.276 Interval 8721 (4360000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7015 7 episodes - episode_reward: -189.702 [-242.004, -109.198] - loss: 14.108 - mae: 85.018 - mean_q: -112.270 Interval 8722 (4360500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9720 8 episodes - episode_reward: -189.967 [-248.331, -134.922] - loss: 12.378 - mae: 85.022 - mean_q: -112.265 Interval 8723 (4361000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2700 6 episodes - episode_reward: -177.569 [-236.722, -143.621] - loss: 11.033 - mae: 85.028 - mean_q: -112.277 Interval 8724 (4361500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3428 10 episodes - episode_reward: -168.184 [-228.248, -100.000] - loss: 12.223 - mae: 85.040 - mean_q: -112.284 Interval 8725 (4362000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7432 7 episodes - episode_reward: -195.020 [-240.850, -119.397] - loss: 10.836 - mae: 85.058 - mean_q: -112.296 Interval 8726 (4362500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9430 8 episodes - episode_reward: -180.976 [-232.169, -122.202] - loss: 9.989 - mae: 85.054 - mean_q: -112.299 Interval 8727 (4363000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7161 8 episodes - episode_reward: -176.944 [-267.805, -100.000] - loss: 10.328 - mae: 85.050 - mean_q: -112.309 Interval 8728 (4363500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4111 7 episodes - episode_reward: -179.009 [-270.109, -132.287] - loss: 8.232 - mae: 85.050 - mean_q: -112.343 Interval 8729 (4364000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0392 9 episodes - episode_reward: -166.411 [-220.598, -126.557] - loss: 10.335 - mae: 85.068 - mean_q: -112.371 Interval 8730 (4364500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6836 7 episodes - episode_reward: -186.602 [-273.112, -129.402] - loss: 10.876 - mae: 85.061 - mean_q: -112.374 Interval 8731 (4365000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2381 9 episodes - episode_reward: -183.710 [-269.708, -121.171] - loss: 12.838 - mae: 85.064 - mean_q: -112.367 Interval 8732 (4365500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3625 8 episodes - episode_reward: -145.667 [-209.926, -34.868] - loss: 10.730 - mae: 85.052 - mean_q: -112.381 Interval 8733 (4366000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7544 7 episodes - episode_reward: -191.038 [-224.546, -173.962] - loss: 12.849 - mae: 85.066 - mean_q: -112.375 Interval 8734 (4366500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5295 10 episodes - episode_reward: -181.224 [-300.979, -113.846] - loss: 8.660 - mae: 85.063 - mean_q: -112.385 Interval 8735 (4367000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3547 7 episodes - episode_reward: -171.116 [-196.052, -130.196] - loss: 11.370 - mae: 85.097 - mean_q: -112.388 Interval 8736 (4367500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0013 6 episodes - episode_reward: -160.601 [-240.557, -51.938] - loss: 9.957 - mae: 85.085 - mean_q: -112.387 Interval 8737 (4368000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1842 7 episodes - episode_reward: -223.736 [-333.782, -153.828] - loss: 12.038 - mae: 85.094 - mean_q: -112.393 Interval 8738 (4368500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1442 7 episodes - episode_reward: -151.193 [-216.173, -37.147] - loss: 9.844 - mae: 85.113 - mean_q: -112.412 Interval 8739 (4369000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8802 7 episodes - episode_reward: -204.555 [-330.417, 11.990] - loss: 12.148 - mae: 85.132 - mean_q: -112.425 Interval 8740 (4369500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2195 10 episodes - episode_reward: -168.075 [-224.683, -100.000] - loss: 15.950 - mae: 85.155 - mean_q: -112.419 Interval 8741 (4370000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4413 8 episodes - episode_reward: -153.389 [-270.499, 38.062] - loss: 10.793 - mae: 85.131 - mean_q: -112.425 Interval 8742 (4370500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3246 6 episodes - episode_reward: -179.510 [-284.101, -109.756] - loss: 12.395 - mae: 85.129 - mean_q: -112.414 Interval 8743 (4371000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7672 7 episodes - episode_reward: -199.107 [-264.475, -145.335] - loss: 12.617 - mae: 85.102 - mean_q: -112.399 Interval 8744 (4371500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2638 9 episodes - episode_reward: -172.132 [-273.568, -122.603] - loss: 11.232 - mae: 85.094 - mean_q: -112.394 Interval 8745 (4372000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6470 7 episodes - episode_reward: -204.376 [-318.109, -121.398] - loss: 8.743 - mae: 85.061 - mean_q: -112.388 Interval 8746 (4372500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4389 6 episodes - episode_reward: -196.614 [-285.534, -111.965] - loss: 10.514 - mae: 85.073 - mean_q: -112.388 Interval 8747 (4373000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6446 7 episodes - episode_reward: -199.094 [-269.678, -154.228] - loss: 10.604 - mae: 85.065 - mean_q: -112.400 Interval 8748 (4373500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1315 8 episodes - episode_reward: -192.023 [-225.638, -163.367] - loss: 9.798 - mae: 85.050 - mean_q: -112.411 Interval 8749 (4374000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6348 9 episodes - episode_reward: -149.295 [-219.787, -109.842] - loss: 10.954 - mae: 85.046 - mean_q: -112.406 Interval 8750 (4374500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0951 9 episodes - episode_reward: -171.158 [-231.519, -113.124] - loss: 12.578 - mae: 85.041 - mean_q: -112.384 Interval 8751 (4375000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0602 8 episodes - episode_reward: -189.037 [-235.905, -161.211] - loss: 8.157 - mae: 85.010 - mean_q: -112.392 Interval 8752 (4375500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1558 9 episodes - episode_reward: -174.941 [-223.837, -105.524] - loss: 9.637 - mae: 85.006 - mean_q: -112.410 Interval 8753 (4376000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7721 6 episodes - episode_reward: -226.551 [-309.944, -143.379] - loss: 8.240 - mae: 85.006 - mean_q: -112.420 Interval 8754 (4376500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1277 7 episodes - episode_reward: -226.529 [-348.539, -151.095] - loss: 7.082 - mae: 85.004 - mean_q: -112.448 Interval 8755 (4377000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0796 6 episodes - episode_reward: -228.487 [-279.007, -175.453] - loss: 10.226 - mae: 85.032 - mean_q: -112.476 Interval 8756 (4377500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5330 7 episodes - episode_reward: -212.298 [-348.370, -146.031] - loss: 8.649 - mae: 85.027 - mean_q: -112.493 Interval 8757 (4378000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1804 8 episodes - episode_reward: -196.094 [-340.167, -112.015] - loss: 9.266 - mae: 85.037 - mean_q: -112.525 Interval 8758 (4378500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7473 8 episodes - episode_reward: -173.092 [-326.103, -54.047] - loss: 10.294 - mae: 85.030 - mean_q: -112.561 Interval 8759 (4379000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3295 9 episodes - episode_reward: -180.067 [-234.690, -100.000] - loss: 14.731 - mae: 85.073 - mean_q: -112.562 Interval 8760 (4379500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8957 8 episodes - episode_reward: -178.370 [-239.938, -95.252] - loss: 12.834 - mae: 85.066 - mean_q: -112.535 Interval 8761 (4380000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3768 8 episodes - episode_reward: -220.696 [-375.191, -120.711] - loss: 9.267 - mae: 85.057 - mean_q: -112.550 Interval 8762 (4380500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3785 7 episodes - episode_reward: -160.043 [-205.267, -100.000] - loss: 12.382 - mae: 85.065 - mean_q: -112.557 Interval 8763 (4381000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6294 8 episodes - episode_reward: -172.492 [-228.027, -56.531] - loss: 9.509 - mae: 85.035 - mean_q: -112.576 Interval 8764 (4381500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2773 9 episodes - episode_reward: -170.190 [-215.803, -123.358] - loss: 11.010 - mae: 85.033 - mean_q: -112.590 Interval 8765 (4382000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8052 10 episodes - episode_reward: -150.546 [-241.180, -85.769] - loss: 8.416 - mae: 85.022 - mean_q: -112.603 Interval 8766 (4382500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2943 7 episodes - episode_reward: -168.411 [-240.479, 3.943] - loss: 10.065 - mae: 85.036 - mean_q: -112.641 Interval 8767 (4383000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6726 7 episodes - episode_reward: -184.029 [-283.709, -100.000] - loss: 10.766 - mae: 85.033 - mean_q: -112.646 Interval 8768 (4383500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3333 9 episodes - episode_reward: -188.818 [-354.081, -100.000] - loss: 9.484 - mae: 85.027 - mean_q: -112.662 Interval 8769 (4384000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7095 6 episodes - episode_reward: -215.408 [-247.018, -167.705] - loss: 9.908 - mae: 85.050 - mean_q: -112.697 Interval 8770 (4384500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8583 8 episodes - episode_reward: -177.928 [-241.353, -116.133] - loss: 11.236 - mae: 85.060 - mean_q: -112.723 Interval 8771 (4385000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7632 7 episodes - episode_reward: -196.115 [-251.396, -141.712] - loss: 12.384 - mae: 85.069 - mean_q: -112.725 Interval 8772 (4385500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0908 8 episodes - episode_reward: -262.706 [-582.356, -113.930] - loss: 11.808 - mae: 85.063 - mean_q: -112.729 Interval 8773 (4386000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8490 9 episodes - episode_reward: -153.289 [-288.400, 16.554] - loss: 13.206 - mae: 85.072 - mean_q: -112.738 Interval 8774 (4386500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0527 9 episodes - episode_reward: -175.641 [-370.554, -111.562] - loss: 8.428 - mae: 85.068 - mean_q: -112.754 Interval 8775 (4387000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4851 7 episodes - episode_reward: -179.266 [-205.891, -142.772] - loss: 9.844 - mae: 85.098 - mean_q: -112.772 Interval 8776 (4387500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8628 8 episodes - episode_reward: -179.695 [-226.406, -129.687] - loss: 10.588 - mae: 85.118 - mean_q: -112.780 Interval 8777 (4388000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4678 7 episodes - episode_reward: -176.189 [-213.843, -141.184] - loss: 11.136 - mae: 85.136 - mean_q: -112.765 Interval 8778 (4388500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.1845 7 episodes - episode_reward: -230.068 [-457.524, -83.844] - loss: 7.783 - mae: 85.116 - mean_q: -112.795 Interval 8779 (4389000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.7761 8 episodes - episode_reward: -479.448 [-734.667, -200.547] - loss: 12.129 - mae: 85.178 - mean_q: -112.827 Interval 8780 (4389500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4164 9 episodes - episode_reward: -190.338 [-227.939, -124.796] - loss: 8.962 - mae: 85.204 - mean_q: -112.819 Interval 8781 (4390000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3648 9 episodes - episode_reward: -179.421 [-343.521, -39.535] - loss: 9.725 - mae: 85.247 - mean_q: -112.842 Interval 8782 (4390500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2084 8 episodes - episode_reward: -211.585 [-366.104, -100.000] - loss: 12.708 - mae: 85.286 - mean_q: -112.858 Interval 8783 (4391000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5093 8 episodes - episode_reward: -151.955 [-240.097, 0.964] - loss: 9.274 - mae: 85.297 - mean_q: -112.871 Interval 8784 (4391500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8291 5 episodes - episode_reward: -261.823 [-332.074, -211.985] - loss: 12.876 - mae: 85.321 - mean_q: -112.884 Interval 8785 (4392000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5928 8 episodes - episode_reward: -175.784 [-289.578, -3.015] - loss: 10.512 - mae: 85.310 - mean_q: -112.917 Interval 8786 (4392500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8998 8 episodes - episode_reward: -183.048 [-223.493, -158.253] - loss: 10.570 - mae: 85.283 - mean_q: -112.952 Interval 8787 (4393000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6042 5 episodes - episode_reward: -361.097 [-727.650, -144.848] - loss: 10.051 - mae: 85.268 - mean_q: -112.958 Interval 8788 (4393500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.3362 5 episodes - episode_reward: -538.661 [-785.243, -201.775] - loss: 9.347 - mae: 85.280 - mean_q: -112.955 Interval 8789 (4394000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8146 6 episodes - episode_reward: -223.021 [-711.961, 4.430] - loss: 11.581 - mae: 85.331 - mean_q: -112.997 Interval 8790 (4394500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3975 10 episodes - episode_reward: -173.077 [-266.464, -77.847] - loss: 11.343 - mae: 85.378 - mean_q: -113.011 Interval 8791 (4395000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9150 9 episodes - episode_reward: -155.906 [-218.547, -109.667] - loss: 8.273 - mae: 85.407 - mean_q: -113.040 Interval 8792 (4395500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9627 6 episodes - episode_reward: -156.314 [-224.265, 13.136] - loss: 10.920 - mae: 85.453 - mean_q: -113.092 Interval 8793 (4396000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5551 9 episodes - episode_reward: -205.576 [-302.257, -129.748] - loss: 12.680 - mae: 85.492 - mean_q: -113.110 Interval 8794 (4396500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4451 7 episodes - episode_reward: -180.425 [-262.132, -71.016] - loss: 12.502 - mae: 85.499 - mean_q: -113.133 Interval 8795 (4397000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8105 7 episodes - episode_reward: -200.551 [-234.257, -162.388] - loss: 10.793 - mae: 85.526 - mean_q: -113.152 Interval 8796 (4397500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9504 8 episodes - episode_reward: -184.133 [-238.070, -115.698] - loss: 10.607 - mae: 85.568 - mean_q: -113.166 Interval 8797 (4398000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0784 8 episodes - episode_reward: -189.494 [-252.961, -127.834] - loss: 11.733 - mae: 85.583 - mean_q: -113.201 Interval 8798 (4398500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6489 7 episodes - episode_reward: -195.305 [-271.076, -98.580] - loss: 9.981 - mae: 85.595 - mean_q: -113.221 Interval 8799 (4399000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7695 7 episodes - episode_reward: -188.927 [-289.456, -125.837] - loss: 9.691 - mae: 85.622 - mean_q: -113.263 Interval 8800 (4399500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7629 9 episodes - episode_reward: -210.401 [-291.681, -120.307] - loss: 11.216 - mae: 85.641 - mean_q: -113.303 Interval 8801 (4400000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2453 9 episodes - episode_reward: -177.126 [-216.295, -128.811] - loss: 14.986 - mae: 85.675 - mean_q: -113.299 Interval 8802 (4400500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4955 8 episodes - episode_reward: -157.261 [-230.788, -46.392] - loss: 13.200 - mae: 85.680 - mean_q: -113.282 Interval 8803 (4401000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3540 9 episodes - episode_reward: -192.841 [-285.079, -100.000] - loss: 11.705 - mae: 85.690 - mean_q: -113.275 Interval 8804 (4401500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6684 6 episodes - episode_reward: -219.210 [-269.309, -176.971] - loss: 10.394 - mae: 85.699 - mean_q: -113.299 Interval 8805 (4402000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3642 9 episodes - episode_reward: -189.005 [-279.122, -111.013] - loss: 11.273 - mae: 85.722 - mean_q: -113.306 Interval 8806 (4402500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7282 6 episodes - episode_reward: -136.377 [-233.112, 34.954] - loss: 12.306 - mae: 85.749 - mean_q: -113.294 Interval 8807 (4403000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3404 10 episodes - episode_reward: -170.492 [-228.254, -121.548] - loss: 10.636 - mae: 85.766 - mean_q: -113.306 Interval 8808 (4403500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1511 6 episodes - episode_reward: -172.050 [-263.291, -109.811] - loss: 10.164 - mae: 85.779 - mean_q: -113.292 Interval 8809 (4404000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0927 9 episodes - episode_reward: -176.526 [-242.176, -100.000] - loss: 8.672 - mae: 85.779 - mean_q: -113.306 Interval 8810 (4404500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1506 10 episodes - episode_reward: -152.844 [-244.918, -92.277] - loss: 10.916 - mae: 85.810 - mean_q: -113.316 Interval 8811 (4405000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4351 7 episodes - episode_reward: -182.403 [-314.071, -114.719] - loss: 9.134 - mae: 85.822 - mean_q: -113.331 Interval 8812 (4405500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7388 7 episodes - episode_reward: -193.786 [-256.072, -153.850] - loss: 15.898 - mae: 85.869 - mean_q: -113.314 Interval 8813 (4406000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2639 6 episodes - episode_reward: -258.500 [-313.346, -178.487] - loss: 13.964 - mae: 85.849 - mean_q: -113.295 Interval 8814 (4406500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6248 10 episodes - episode_reward: -182.024 [-273.716, -116.021] - loss: 12.250 - mae: 85.847 - mean_q: -113.297 Interval 8815 (4407000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2092 9 episodes - episode_reward: -191.292 [-329.610, -115.643] - loss: 9.675 - mae: 85.846 - mean_q: -113.312 Interval 8816 (4407500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9556 7 episodes - episode_reward: -204.737 [-275.674, -154.634] - loss: 14.838 - mae: 85.888 - mean_q: -113.308 Interval 8817 (4408000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0776 7 episodes - episode_reward: -212.724 [-379.697, -129.639] - loss: 12.038 - mae: 85.897 - mean_q: -113.287 Interval 8818 (4408500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1183 9 episodes - episode_reward: -167.939 [-242.978, -100.000] - loss: 15.903 - mae: 85.905 - mean_q: -113.289 Interval 8819 (4409000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9581 8 episodes - episode_reward: -199.055 [-264.155, -81.506] - loss: 10.596 - mae: 85.880 - mean_q: -113.288 Interval 8820 (4409500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9610 6 episodes - episode_reward: -159.402 [-243.462, -47.957] - loss: 10.401 - mae: 85.895 - mean_q: -113.314 Interval 8821 (4410000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0978 6 episodes - episode_reward: -246.382 [-494.879, -156.006] - loss: 10.389 - mae: 85.892 - mean_q: -113.325 Interval 8822 (4410500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9227 8 episodes - episode_reward: -197.752 [-343.102, -100.000] - loss: 12.888 - mae: 85.910 - mean_q: -113.322 Interval 8823 (4411000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3101 7 episodes - episode_reward: -156.930 [-207.881, -100.000] - loss: 10.904 - mae: 85.917 - mean_q: -113.320 Interval 8824 (4411500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9181 9 episodes - episode_reward: -166.532 [-238.231, -100.000] - loss: 13.431 - mae: 85.926 - mean_q: -113.312 Interval 8825 (4412000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5093 6 episodes - episode_reward: -203.154 [-281.635, -139.820] - loss: 14.115 - mae: 85.920 - mean_q: -113.292 Interval 8826 (4412500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4141 7 episodes - episode_reward: -167.225 [-241.409, -86.476] - loss: 13.095 - mae: 85.930 - mean_q: -113.274 Interval 8827 (4413000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3823 7 episodes - episode_reward: -179.510 [-298.433, 34.866] - loss: 7.620 - mae: 85.914 - mean_q: -113.292 Interval 8828 (4413500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4941 7 episodes - episode_reward: -167.490 [-204.059, -111.445] - loss: 16.807 - mae: 85.956 - mean_q: -113.288 Interval 8829 (4414000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4010 8 episodes - episode_reward: -157.962 [-220.001, -107.562] - loss: 8.812 - mae: 85.920 - mean_q: -113.302 Interval 8830 (4414500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9209 8 episodes - episode_reward: -182.294 [-260.230, -140.083] - loss: 11.663 - mae: 85.937 - mean_q: -113.315 Interval 8831 (4415000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9863 9 episodes - episode_reward: -163.742 [-198.560, -110.785] - loss: 15.211 - mae: 85.924 - mean_q: -113.288 Interval 8832 (4415500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6121 8 episodes - episode_reward: -158.333 [-190.385, -122.142] - loss: 11.691 - mae: 85.899 - mean_q: -113.307 Interval 8833 (4416000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0179 9 episodes - episode_reward: -170.093 [-227.073, -74.740] - loss: 13.595 - mae: 85.904 - mean_q: -113.337 Interval 8834 (4416500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0091 8 episodes - episode_reward: -187.393 [-311.113, -117.379] - loss: 12.705 - mae: 85.904 - mean_q: -113.328 Interval 8835 (4417000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2398 9 episodes - episode_reward: -185.636 [-233.026, -100.000] - loss: 12.655 - mae: 85.917 - mean_q: -113.328 Interval 8836 (4417500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9833 7 episodes - episode_reward: -134.896 [-229.150, 21.860] - loss: 13.055 - mae: 85.928 - mean_q: -113.324 Interval 8837 (4418000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0784 8 episodes - episode_reward: -190.186 [-232.149, -124.265] - loss: 12.068 - mae: 85.923 - mean_q: -113.303 Interval 8838 (4418500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6521 7 episodes - episode_reward: -187.630 [-260.189, -146.374] - loss: 10.003 - mae: 85.932 - mean_q: -113.309 Interval 8839 (4419000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7745 9 episodes - episode_reward: -159.667 [-284.347, -30.750] - loss: 14.119 - mae: 85.966 - mean_q: -113.306 Interval 8840 (4419500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7347 8 episodes - episode_reward: -171.591 [-267.326, -114.280] - loss: 10.426 - mae: 85.946 - mean_q: -113.304 Interval 8841 (4420000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8667 8 episodes - episode_reward: -174.458 [-299.873, -100.000] - loss: 12.503 - mae: 85.957 - mean_q: -113.306 Interval 8842 (4420500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6746 8 episodes - episode_reward: -173.931 [-283.943, -4.443] - loss: 12.902 - mae: 85.957 - mean_q: -113.300 Interval 8843 (4421000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0920 6 episodes - episode_reward: -178.027 [-230.085, -128.040] - loss: 12.606 - mae: 85.963 - mean_q: -113.281 Interval 8844 (4421500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7463 8 episodes - episode_reward: -166.986 [-209.973, -112.575] - loss: 10.338 - mae: 85.954 - mean_q: -113.256 Interval 8845 (4422000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2025 8 episodes - episode_reward: -205.413 [-269.434, -150.256] - loss: 11.034 - mae: 85.948 - mean_q: -113.266 Interval 8846 (4422500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4359 7 episodes - episode_reward: -162.582 [-235.654, -119.347] - loss: 12.792 - mae: 85.951 - mean_q: -113.255 Interval 8847 (4423000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6401 7 episodes - episode_reward: -192.705 [-254.729, -144.485] - loss: 8.670 - mae: 85.924 - mean_q: -113.263 Interval 8848 (4423500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6718 9 episodes - episode_reward: -149.995 [-201.973, -100.048] - loss: 10.069 - mae: 85.937 - mean_q: -113.297 Interval 8849 (4424000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8890 8 episodes - episode_reward: -175.657 [-246.931, -129.510] - loss: 10.887 - mae: 85.930 - mean_q: -113.311 Interval 8850 (4424500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5748 9 episodes - episode_reward: -203.645 [-256.902, -136.351] - loss: 10.197 - mae: 85.942 - mean_q: -113.306 Interval 8851 (4425000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7487 8 episodes - episode_reward: -167.266 [-220.948, -100.000] - loss: 11.975 - mae: 85.939 - mean_q: -113.321 Interval 8852 (4425500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9930 7 episodes - episode_reward: -144.562 [-265.263, 0.937] - loss: 14.666 - mae: 85.957 - mean_q: -113.300 Interval 8853 (4426000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0553 6 episodes - episode_reward: -172.098 [-227.604, -142.752] - loss: 9.200 - mae: 85.940 - mean_q: -113.293 Interval 8854 (4426500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6783 7 episodes - episode_reward: -194.157 [-382.924, 3.750] - loss: 14.395 - mae: 85.942 - mean_q: -113.266 Interval 8855 (4427000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7703 7 episodes - episode_reward: -195.735 [-281.164, -169.015] - loss: 13.151 - mae: 85.929 - mean_q: -113.258 Interval 8856 (4427500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8360 6 episodes - episode_reward: -144.049 [-194.301, -4.820] - loss: 9.926 - mae: 85.906 - mean_q: -113.249 Interval 8857 (4428000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9933 7 episodes - episode_reward: -220.012 [-390.145, -170.344] - loss: 12.749 - mae: 85.923 - mean_q: -113.238 Interval 8858 (4428500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7454 9 episodes - episode_reward: -155.467 [-220.335, -66.937] - loss: 10.610 - mae: 85.898 - mean_q: -113.230 Interval 8859 (4429000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8431 8 episodes - episode_reward: -176.004 [-292.029, -100.000] - loss: 11.134 - mae: 85.883 - mean_q: -113.229 Interval 8860 (4429500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7360 7 episodes - episode_reward: -195.176 [-242.685, -141.645] - loss: 10.405 - mae: 85.874 - mean_q: -113.231 Interval 8861 (4430000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6420 9 episodes - episode_reward: -146.988 [-250.497, 47.110] - loss: 12.673 - mae: 85.892 - mean_q: -113.212 Interval 8862 (4430500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9085 6 episodes - episode_reward: -161.352 [-247.359, 1.252] - loss: 14.826 - mae: 85.880 - mean_q: -113.170 Interval 8863 (4431000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1534 7 episodes - episode_reward: -141.877 [-255.215, 30.055] - loss: 13.509 - mae: 85.857 - mean_q: -113.135 Interval 8864 (4431500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9405 7 episodes - episode_reward: -221.760 [-318.229, -130.124] - loss: 14.710 - mae: 85.859 - mean_q: -113.106 Interval 8865 (4432000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8383 8 episodes - episode_reward: -175.266 [-222.127, -100.000] - loss: 11.652 - mae: 85.840 - mean_q: -113.065 Interval 8866 (4432500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3992 7 episodes - episode_reward: -164.239 [-240.728, -72.270] - loss: 8.868 - mae: 85.831 - mean_q: -113.062 Interval 8867 (4433000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9588 10 episodes - episode_reward: -196.168 [-463.903, -100.000] - loss: 12.312 - mae: 85.835 - mean_q: -113.067 Interval 8868 (4433500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7533 7 episodes - episode_reward: -200.785 [-287.716, -148.196] - loss: 16.900 - mae: 85.846 - mean_q: -113.034 Interval 8869 (4434000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0277 8 episodes - episode_reward: -185.268 [-353.978, -120.088] - loss: 10.098 - mae: 85.806 - mean_q: -113.004 Interval 8870 (4434500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4382 7 episodes - episode_reward: -169.555 [-229.775, -92.551] - loss: 9.855 - mae: 85.803 - mean_q: -113.026 Interval 8871 (4435000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9397 8 episodes - episode_reward: -183.776 [-244.780, -140.286] - loss: 11.600 - mae: 85.794 - mean_q: -112.997 Interval 8872 (4435500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4114 8 episodes - episode_reward: -222.689 [-371.492, -149.589] - loss: 13.033 - mae: 85.788 - mean_q: -113.001 Interval 8873 (4436000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1085 6 episodes - episode_reward: -179.233 [-236.961, -134.805] - loss: 11.619 - mae: 85.760 - mean_q: -112.974 Interval 8874 (4436500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6016 9 episodes - episode_reward: -201.921 [-295.059, -102.708] - loss: 13.036 - mae: 85.756 - mean_q: -112.965 Interval 8875 (4437000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2064 11 episodes - episode_reward: -145.056 [-198.949, -100.000] - loss: 14.160 - mae: 85.732 - mean_q: -112.943 Interval 8876 (4437500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9151 9 episodes - episode_reward: -162.720 [-257.367, -62.588] - loss: 9.876 - mae: 85.702 - mean_q: -112.931 Interval 8877 (4438000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3344 7 episodes - episode_reward: -159.960 [-300.112, -26.909] - loss: 12.315 - mae: 85.689 - mean_q: -112.943 Interval 8878 (4438500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0975 8 episodes - episode_reward: -138.474 [-187.532, -53.029] - loss: 13.406 - mae: 85.672 - mean_q: -112.933 Interval 8879 (4439000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1959 6 episodes - episode_reward: -161.199 [-230.833, -112.161] - loss: 11.331 - mae: 85.630 - mean_q: -112.921 Interval 8880 (4439500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6111 7 episodes - episode_reward: -194.034 [-225.248, -127.596] - loss: 15.421 - mae: 85.599 - mean_q: -112.873 Interval 8881 (4440000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2567 8 episodes - episode_reward: -206.607 [-283.759, -126.424] - loss: 9.675 - mae: 85.526 - mean_q: -112.858 Interval 8882 (4440500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8099 7 episodes - episode_reward: -201.659 [-248.019, -159.309] - loss: 11.803 - mae: 85.497 - mean_q: -112.832 Interval 8883 (4441000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2408 7 episodes - episode_reward: -160.806 [-212.935, -45.735] - loss: 15.546 - mae: 85.458 - mean_q: -112.801 Interval 8884 (4441500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6792 8 episodes - episode_reward: -172.125 [-283.901, -100.570] - loss: 12.262 - mae: 85.409 - mean_q: -112.748 Interval 8885 (4442000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6289 9 episodes - episode_reward: -201.434 [-268.268, -138.954] - loss: 10.191 - mae: 85.378 - mean_q: -112.720 Interval 8886 (4442500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5253 5 episodes - episode_reward: -143.945 [-205.964, -80.646] - loss: 10.026 - mae: 85.330 - mean_q: -112.702 Interval 8887 (4443000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7658 7 episodes - episode_reward: -195.242 [-320.349, -103.363] - loss: 12.336 - mae: 85.297 - mean_q: -112.672 Interval 8888 (4443500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1241 8 episodes - episode_reward: -202.618 [-287.822, -162.165] - loss: 12.298 - mae: 85.236 - mean_q: -112.624 Interval 8889 (4444000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6100 7 episodes - episode_reward: -167.362 [-234.079, -87.005] - loss: 10.223 - mae: 85.143 - mean_q: -112.580 Interval 8890 (4444500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4470 9 episodes - episode_reward: -204.965 [-317.069, -111.596] - loss: 11.119 - mae: 85.096 - mean_q: -112.563 Interval 8891 (4445000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6173 7 episodes - episode_reward: -167.972 [-212.630, -121.656] - loss: 11.674 - mae: 85.022 - mean_q: -112.513 Interval 8892 (4445500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5395 8 episodes - episode_reward: -237.492 [-311.593, -163.790] - loss: 9.633 - mae: 84.936 - mean_q: -112.491 Interval 8893 (4446000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9446 8 episodes - episode_reward: -307.061 [-844.436, -100.000] - loss: 7.987 - mae: 84.883 - mean_q: -112.474 Interval 8894 (4446500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.0785 3 episodes - episode_reward: -945.815 [-1553.421, -458.385] - loss: 9.342 - mae: 84.862 - mean_q: -112.423 Interval 8895 (4447000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1592 8 episodes - episode_reward: -219.107 [-384.810, -132.073] - loss: 11.083 - mae: 84.886 - mean_q: -112.433 Interval 8896 (4447500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6348 7 episodes - episode_reward: -188.799 [-390.219, -126.207] - loss: 9.481 - mae: 84.910 - mean_q: -112.412 Interval 8897 (4448000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5502 10 episodes - episode_reward: -181.937 [-334.476, -100.000] - loss: 8.683 - mae: 84.923 - mean_q: -112.384 Interval 8898 (4448500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1491 7 episodes - episode_reward: -221.682 [-383.318, -149.982] - loss: 7.745 - mae: 84.901 - mean_q: -112.384 Interval 8899 (4449000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7036 6 episodes - episode_reward: -146.003 [-236.318, -105.759] - loss: 9.378 - mae: 84.902 - mean_q: -112.394 Interval 8900 (4449500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5399 7 episodes - episode_reward: -181.041 [-241.704, -130.339] - loss: 9.691 - mae: 84.900 - mean_q: -112.423 Interval 8901 (4450000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8500 8 episodes - episode_reward: -169.583 [-242.297, -100.000] - loss: 7.623 - mae: 84.908 - mean_q: -112.422 Interval 8902 (4450500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9260 9 episodes - episode_reward: -171.055 [-262.258, -85.671] - loss: 10.141 - mae: 84.920 - mean_q: -112.447 Interval 8903 (4451000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9430 7 episodes - episode_reward: -211.610 [-322.672, -137.230] - loss: 10.254 - mae: 84.933 - mean_q: -112.421 Interval 8904 (4451500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2283 8 episodes - episode_reward: -191.399 [-284.062, -134.192] - loss: 10.964 - mae: 84.926 - mean_q: -112.411 Interval 8905 (4452000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1869 9 episodes - episode_reward: -186.432 [-226.427, -130.638] - loss: 10.891 - mae: 84.927 - mean_q: -112.364 Interval 8906 (4452500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3322 11 episodes - episode_reward: -143.887 [-227.652, -100.905] - loss: 11.270 - mae: 84.924 - mean_q: -112.341 Interval 8907 (4453000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0957 7 episodes - episode_reward: -228.031 [-326.028, -137.478] - loss: 10.949 - mae: 84.923 - mean_q: -112.320 Interval 8908 (4453500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2897 8 episodes - episode_reward: -201.155 [-241.377, -145.054] - loss: 11.362 - mae: 84.931 - mean_q: -112.309 Interval 8909 (4454000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9116 7 episodes - episode_reward: -202.542 [-307.146, -100.907] - loss: 12.911 - mae: 84.923 - mean_q: -112.295 Interval 8910 (4454500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7842 8 episodes - episode_reward: -188.097 [-302.518, -114.988] - loss: 9.191 - mae: 84.899 - mean_q: -112.267 Interval 8911 (4455000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0242 7 episodes - episode_reward: -212.642 [-329.346, -153.537] - loss: 9.239 - mae: 84.895 - mean_q: -112.302 Interval 8912 (4455500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7795 6 episodes - episode_reward: -217.511 [-283.496, -159.825] - loss: 10.703 - mae: 84.893 - mean_q: -112.300 Interval 8913 (4456000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7505 8 episodes - episode_reward: -177.204 [-230.347, -100.000] - loss: 9.664 - mae: 84.868 - mean_q: -112.285 Interval 8914 (4456500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3761 10 episodes - episode_reward: -173.307 [-205.118, -133.906] - loss: 7.879 - mae: 84.849 - mean_q: -112.285 Interval 8915 (4457000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8746 7 episodes - episode_reward: -136.054 [-212.623, -11.875] - loss: 10.710 - mae: 84.839 - mean_q: -112.286 Interval 8916 (4457500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7530 7 episodes - episode_reward: -182.643 [-234.037, -115.387] - loss: 13.499 - mae: 84.857 - mean_q: -112.241 Interval 8917 (4458000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1104 9 episodes - episode_reward: -182.030 [-284.971, -10.315] - loss: 12.721 - mae: 84.838 - mean_q: -112.179 Interval 8918 (4458500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9122 8 episodes - episode_reward: -182.868 [-238.128, -102.722] - loss: 13.036 - mae: 84.829 - mean_q: -112.145 Interval 8919 (4459000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3510 9 episodes - episode_reward: -178.351 [-217.321, -138.262] - loss: 7.763 - mae: 84.792 - mean_q: -112.142 Interval 8920 (4459500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6501 10 episodes - episode_reward: -183.399 [-352.478, -99.887] - loss: 8.302 - mae: 84.780 - mean_q: -112.162 Interval 8921 (4460000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6121 8 episodes - episode_reward: -166.440 [-226.123, -110.894] - loss: 7.134 - mae: 84.769 - mean_q: -112.155 Interval 8922 (4460500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6637 7 episodes - episode_reward: -188.991 [-268.789, -142.953] - loss: 12.760 - mae: 84.785 - mean_q: -112.126 Interval 8923 (4461000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2616 9 episodes - episode_reward: -185.846 [-247.289, -86.636] - loss: 11.689 - mae: 84.771 - mean_q: -112.115 Interval 8924 (4461500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9454 7 episodes - episode_reward: -204.550 [-271.582, -116.969] - loss: 9.426 - mae: 84.754 - mean_q: -112.077 Interval 8925 (4462000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7399 7 episodes - episode_reward: -195.642 [-242.360, -86.132] - loss: 7.708 - mae: 84.730 - mean_q: -112.075 Interval 8926 (4462500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3116 8 episodes - episode_reward: -213.182 [-261.947, -133.556] - loss: 11.288 - mae: 84.734 - mean_q: -112.061 Interval 8927 (4463000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4070 9 episodes - episode_reward: -184.460 [-229.956, -110.432] - loss: 12.684 - mae: 84.728 - mean_q: -112.033 Interval 8928 (4463500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7325 9 episodes - episode_reward: -154.651 [-205.792, -64.565] - loss: 13.895 - mae: 84.710 - mean_q: -112.008 Interval 8929 (4464000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4091 8 episodes - episode_reward: -213.587 [-297.757, -100.000] - loss: 11.658 - mae: 84.698 - mean_q: -111.986 Interval 8930 (4464500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0770 9 episodes - episode_reward: -169.224 [-262.802, -3.175] - loss: 8.982 - mae: 84.679 - mean_q: -111.974 Interval 8931 (4465000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4455 8 episodes - episode_reward: -212.195 [-317.344, -139.997] - loss: 10.569 - mae: 84.672 - mean_q: -111.984 Interval 8932 (4465500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1990 8 episodes - episode_reward: -135.755 [-229.466, 19.079] - loss: 12.167 - mae: 84.670 - mean_q: -111.992 Interval 8933 (4466000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2958 9 episodes - episode_reward: -191.875 [-283.011, -135.420] - loss: 9.215 - mae: 84.652 - mean_q: -111.980 Interval 8934 (4466500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4451 10 episodes - episode_reward: -167.473 [-233.441, -123.561] - loss: 10.163 - mae: 84.638 - mean_q: -111.992 Interval 8935 (4467000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2097 10 episodes - episode_reward: -161.600 [-242.917, -90.928] - loss: 10.608 - mae: 84.647 - mean_q: -111.983 Interval 8936 (4467500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4708 7 episodes - episode_reward: -178.185 [-276.450, -137.463] - loss: 11.103 - mae: 84.633 - mean_q: -111.976 Interval 8937 (4468000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8910 8 episodes - episode_reward: -185.334 [-257.714, -34.860] - loss: 10.870 - mae: 84.623 - mean_q: -111.983 Interval 8938 (4468500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3401 8 episodes - episode_reward: -201.476 [-280.045, -160.422] - loss: 10.541 - mae: 84.616 - mean_q: -112.010 Interval 8939 (4469000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8081 8 episodes - episode_reward: -176.827 [-234.862, -98.335] - loss: 8.479 - mae: 84.616 - mean_q: -112.017 Interval 8940 (4469500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1656 10 episodes - episode_reward: -160.667 [-282.967, -100.000] - loss: 9.968 - mae: 84.623 - mean_q: -112.006 Interval 8941 (4470000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9934 8 episodes - episode_reward: -179.936 [-300.436, -100.000] - loss: 10.242 - mae: 84.615 - mean_q: -112.006 Interval 8942 (4470500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6529 8 episodes - episode_reward: -171.966 [-214.137, -113.719] - loss: 10.866 - mae: 84.627 - mean_q: -112.005 Interval 8943 (4471000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9069 11 episodes - episode_reward: -173.206 [-234.142, -100.000] - loss: 11.384 - mae: 84.636 - mean_q: -111.974 Interval 8944 (4471500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4050 8 episodes - episode_reward: -215.664 [-256.470, -152.043] - loss: 10.523 - mae: 84.635 - mean_q: -111.986 Interval 8945 (4472000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2019 9 episodes - episode_reward: -180.705 [-241.587, -111.412] - loss: 10.434 - mae: 84.642 - mean_q: -111.986 Interval 8946 (4472500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0215 7 episodes - episode_reward: -201.135 [-233.680, -157.235] - loss: 12.655 - mae: 84.656 - mean_q: -111.988 Interval 8947 (4473000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2059 7 episodes - episode_reward: -167.233 [-234.857, -102.082] - loss: 7.164 - mae: 84.620 - mean_q: -112.004 Interval 8948 (4473500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8848 9 episodes - episode_reward: -162.412 [-214.709, -74.960] - loss: 9.714 - mae: 84.639 - mean_q: -112.033 Interval 8949 (4474000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1937 7 episodes - episode_reward: -225.612 [-421.204, -138.660] - loss: 10.399 - mae: 84.655 - mean_q: -112.055 Interval 8950 (4474500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9596 6 episodes - episode_reward: -165.972 [-204.725, -143.201] - loss: 11.460 - mae: 84.662 - mean_q: -112.059 Interval 8951 (4475000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3834 9 episodes - episode_reward: -190.304 [-256.151, -109.111] - loss: 10.986 - mae: 84.688 - mean_q: -112.044 Interval 8952 (4475500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5588 6 episodes - episode_reward: -206.116 [-277.231, -143.883] - loss: 8.639 - mae: 84.678 - mean_q: -112.062 Interval 8953 (4476000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9845 9 episodes - episode_reward: -171.449 [-251.886, -115.142] - loss: 10.723 - mae: 84.702 - mean_q: -112.092 Interval 8954 (4476500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5296 7 episodes - episode_reward: -171.562 [-237.239, -67.507] - loss: 9.675 - mae: 84.709 - mean_q: -112.096 Interval 8955 (4477000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9805 8 episodes - episode_reward: -188.677 [-226.807, -149.496] - loss: 11.064 - mae: 84.731 - mean_q: -112.105 Interval 8956 (4477500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9638 8 episodes - episode_reward: -182.837 [-282.831, -100.000] - loss: 13.149 - mae: 84.720 - mean_q: -112.129 Interval 8957 (4478000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9067 7 episodes - episode_reward: -205.052 [-293.350, -161.456] - loss: 9.415 - mae: 84.698 - mean_q: -112.154 Interval 8958 (4478500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.5385 8 episodes - episode_reward: -246.833 [-462.665, -9.301] - loss: 10.517 - mae: 84.704 - mean_q: -112.187 Interval 8959 (4479000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.5310 9 episodes - episode_reward: -345.264 [-874.121, -100.000] - loss: 10.371 - mae: 84.722 - mean_q: -112.192 Interval 8960 (4479500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4325 8 episodes - episode_reward: -154.856 [-236.434, 9.669] - loss: 9.384 - mae: 84.768 - mean_q: -112.228 Interval 8961 (4480000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1282 7 episodes - episode_reward: -152.376 [-219.715, 1.125] - loss: 10.145 - mae: 84.821 - mean_q: -112.249 Interval 8962 (4480500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9119 7 episodes - episode_reward: -202.270 [-302.785, -145.225] - loss: 11.004 - mae: 84.851 - mean_q: -112.261 Interval 8963 (4481000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4604 7 episodes - episode_reward: -181.685 [-263.043, -37.364] - loss: 8.821 - mae: 84.889 - mean_q: -112.304 Interval 8964 (4481500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7789 8 episodes - episode_reward: -163.129 [-221.587, -122.361] - loss: 13.008 - mae: 84.950 - mean_q: -112.311 Interval 8965 (4482000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7814 9 episodes - episode_reward: -161.647 [-247.178, -100.000] - loss: 8.991 - mae: 84.948 - mean_q: -112.334 Interval 8966 (4482500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2882 10 episodes - episode_reward: -164.814 [-270.431, -100.000] - loss: 9.106 - mae: 84.966 - mean_q: -112.364 Interval 8967 (4483000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3951 7 episodes - episode_reward: -168.028 [-236.064, -129.114] - loss: 9.562 - mae: 84.995 - mean_q: -112.386 Interval 8968 (4483500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0535 8 episodes - episode_reward: -189.704 [-289.215, -112.624] - loss: 8.552 - mae: 85.005 - mean_q: -112.410 Interval 8969 (4484000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3406 8 episodes - episode_reward: -208.159 [-314.196, -138.859] - loss: 8.423 - mae: 85.047 - mean_q: -112.445 Interval 8970 (4484500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0696 6 episodes - episode_reward: -174.717 [-218.014, -146.611] - loss: 11.844 - mae: 85.081 - mean_q: -112.468 Interval 8971 (4485000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4246 7 episodes - episode_reward: -179.012 [-256.845, -27.400] - loss: 11.497 - mae: 85.101 - mean_q: -112.465 Interval 8972 (4485500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6507 9 episodes - episode_reward: -146.581 [-221.959, -53.772] - loss: 12.415 - mae: 85.116 - mean_q: -112.486 Interval 8973 (4486000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4266 7 episodes - episode_reward: -172.654 [-253.263, -6.236] - loss: 8.123 - mae: 85.144 - mean_q: -112.511 Interval 8974 (4486500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7578 9 episodes - episode_reward: -153.279 [-198.088, -92.727] - loss: 10.979 - mae: 85.190 - mean_q: -112.535 Interval 8975 (4487000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9854 8 episodes - episode_reward: -179.183 [-266.175, -100.000] - loss: 7.328 - mae: 85.192 - mean_q: -112.542 Interval 8976 (4487500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9149 9 episodes - episode_reward: -166.584 [-225.211, -100.000] - loss: 9.533 - mae: 85.224 - mean_q: -112.576 Interval 8977 (4488000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8940 8 episodes - episode_reward: -182.645 [-290.243, -137.507] - loss: 9.518 - mae: 85.240 - mean_q: -112.578 Interval 8978 (4488500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5915 7 episodes - episode_reward: -168.776 [-207.199, -129.834] - loss: 9.013 - mae: 85.258 - mean_q: -112.602 Interval 8979 (4489000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5693 9 episodes - episode_reward: -157.541 [-382.472, 14.758] - loss: 8.624 - mae: 85.268 - mean_q: -112.613 Interval 8980 (4489500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4625 7 episodes - episode_reward: -176.540 [-264.011, -125.245] - loss: 10.472 - mae: 85.275 - mean_q: -112.658 Interval 8981 (4490000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.5563 5 episodes - episode_reward: -145.109 [-186.841, -103.198] - loss: 12.932 - mae: 85.324 - mean_q: -112.656 Interval 8982 (4490500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9488 9 episodes - episode_reward: -160.782 [-223.554, -117.884] - loss: 9.323 - mae: 85.319 - mean_q: -112.669 Interval 8983 (4491000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3394 8 episodes - episode_reward: -151.772 [-232.630, 9.870] - loss: 11.082 - mae: 85.333 - mean_q: -112.684 Interval 8984 (4491500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3003 7 episodes - episode_reward: -158.052 [-247.534, -51.244] - loss: 9.366 - mae: 85.336 - mean_q: -112.689 Interval 8985 (4492000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9366 8 episodes - episode_reward: -185.045 [-250.850, -100.000] - loss: 11.634 - mae: 85.361 - mean_q: -112.681 Interval 8986 (4492500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7816 8 episodes - episode_reward: -176.413 [-211.603, -146.307] - loss: 8.851 - mae: 85.376 - mean_q: -112.679 Interval 8987 (4493000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0152 8 episodes - episode_reward: -189.564 [-303.321, -136.583] - loss: 11.172 - mae: 85.391 - mean_q: -112.681 Interval 8988 (4493500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1456 7 episodes - episode_reward: -148.896 [-216.083, -52.065] - loss: 9.849 - mae: 85.377 - mean_q: -112.645 Interval 8989 (4494000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5964 8 episodes - episode_reward: -158.990 [-221.958, -128.263] - loss: 8.725 - mae: 85.378 - mean_q: -112.661 Interval 8990 (4494500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0920 9 episodes - episode_reward: -170.164 [-259.481, -91.186] - loss: 9.436 - mae: 85.390 - mean_q: -112.678 Interval 8991 (4495000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6529 7 episodes - episode_reward: -197.674 [-312.499, -24.365] - loss: 9.750 - mae: 85.410 - mean_q: -112.668 Interval 8992 (4495500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7188 8 episodes - episode_reward: -170.220 [-208.809, -110.444] - loss: 9.117 - mae: 85.395 - mean_q: -112.685 Interval 8993 (4496000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3252 7 episodes - episode_reward: -163.931 [-185.800, -142.797] - loss: 9.999 - mae: 85.374 - mean_q: -112.684 Interval 8994 (4496500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6835 8 episodes - episode_reward: -177.171 [-282.154, -122.490] - loss: 7.435 - mae: 85.332 - mean_q: -112.691 Interval 8995 (4497000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2143 7 episodes - episode_reward: -227.109 [-454.769, -107.449] - loss: 10.697 - mae: 85.306 - mean_q: -112.682 Interval 8996 (4497500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4295 7 episodes - episode_reward: -162.188 [-273.340, -22.282] - loss: 14.226 - mae: 85.267 - mean_q: -112.642 Interval 8997 (4498000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7228 8 episodes - episode_reward: -173.175 [-279.881, -77.396] - loss: 9.373 - mae: 85.207 - mean_q: -112.628 Interval 8998 (4498500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0939 8 episodes - episode_reward: -309.858 [-619.971, -100.000] - loss: 10.522 - mae: 85.197 - mean_q: -112.556 Interval 8999 (4499000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.7288 8 episodes - episode_reward: -351.382 [-627.866, -188.217] - loss: 11.374 - mae: 85.178 - mean_q: -112.483 Interval 9000 (4499500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3753 8 episodes - episode_reward: -225.903 [-324.732, -124.177] - loss: 9.357 - mae: 85.192 - mean_q: -112.518 Interval 9001 (4500000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6954 7 episodes - episode_reward: -190.434 [-265.305, -118.181] - loss: 7.557 - mae: 85.191 - mean_q: -112.533 Interval 9002 (4500500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8677 8 episodes - episode_reward: -188.301 [-248.781, -146.301] - loss: 7.877 - mae: 85.210 - mean_q: -112.556 Interval 9003 (4501000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6218 8 episodes - episode_reward: -156.761 [-205.481, -45.862] - loss: 10.501 - mae: 85.235 - mean_q: -112.548 Interval 9004 (4501500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2912 9 episodes - episode_reward: -190.957 [-266.139, -149.874] - loss: 8.816 - mae: 85.224 - mean_q: -112.531 Interval 9005 (4502000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4306 7 episodes - episode_reward: -170.543 [-253.983, -42.198] - loss: 11.257 - mae: 85.232 - mean_q: -112.506 Interval 9006 (4502500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5778 8 episodes - episode_reward: -157.105 [-235.210, -28.729] - loss: 10.456 - mae: 85.214 - mean_q: -112.486 Interval 9007 (4503000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5463 8 episodes - episode_reward: -163.670 [-270.756, -38.880] - loss: 9.375 - mae: 85.221 - mean_q: -112.487 Interval 9008 (4503500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4793 8 episodes - episode_reward: -211.193 [-289.178, -126.102] - loss: 11.987 - mae: 85.212 - mean_q: -112.472 Interval 9009 (4504000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8904 9 episodes - episode_reward: -161.660 [-221.565, -68.967] - loss: 9.112 - mae: 85.205 - mean_q: -112.444 Interval 9010 (4504500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8835 8 episodes - episode_reward: -180.763 [-304.975, -46.211] - loss: 12.705 - mae: 85.205 - mean_q: -112.452 Interval 9011 (4505000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0012 8 episodes - episode_reward: -197.078 [-247.223, -126.000] - loss: 8.187 - mae: 85.202 - mean_q: -112.441 Interval 9012 (4505500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1402 5 episodes - episode_reward: -165.759 [-182.671, -139.822] - loss: 9.783 - mae: 85.194 - mean_q: -112.428 Interval 9013 (4506000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2147 8 episodes - episode_reward: -211.636 [-303.649, -100.000] - loss: 9.831 - mae: 85.201 - mean_q: -112.427 Interval 9014 (4506500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9354 9 episodes - episode_reward: -174.146 [-213.065, -151.197] - loss: 12.261 - mae: 85.199 - mean_q: -112.398 Interval 9015 (4507000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7174 9 episodes - episode_reward: -152.885 [-241.134, -100.000] - loss: 10.444 - mae: 85.169 - mean_q: -112.372 Interval 9016 (4507500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4869 8 episodes - episode_reward: -203.467 [-244.275, -167.918] - loss: 12.161 - mae: 85.158 - mean_q: -112.344 Interval 9017 (4508000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2593 8 episodes - episode_reward: -213.281 [-279.110, -183.135] - loss: 11.731 - mae: 85.145 - mean_q: -112.328 Interval 9018 (4508500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8862 7 episodes - episode_reward: -208.695 [-247.254, -168.798] - loss: 10.523 - mae: 85.134 - mean_q: -112.312 Interval 9019 (4509000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3780 6 episodes - episode_reward: -197.532 [-301.617, -155.929] - loss: 9.310 - mae: 85.117 - mean_q: -112.316 Interval 9020 (4509500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0867 7 episodes - episode_reward: -219.694 [-391.414, -124.194] - loss: 10.155 - mae: 85.112 - mean_q: -112.298 Interval 9021 (4510000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8053 9 episodes - episode_reward: -154.440 [-203.439, -116.097] - loss: 8.356 - mae: 85.106 - mean_q: -112.293 Interval 9022 (4510500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7953 9 episodes - episode_reward: -160.423 [-260.554, -76.525] - loss: 11.076 - mae: 85.112 - mean_q: -112.331 Interval 9023 (4511000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7685 8 episodes - episode_reward: -172.851 [-213.252, -145.695] - loss: 10.821 - mae: 85.104 - mean_q: -112.328 Interval 9024 (4511500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7188 7 episodes - episode_reward: -177.423 [-262.762, -132.564] - loss: 8.949 - mae: 85.093 - mean_q: -112.325 Interval 9025 (4512000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1149 8 episodes - episode_reward: -209.265 [-376.924, -151.122] - loss: 9.110 - mae: 85.091 - mean_q: -112.313 Interval 9026 (4512500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9927 7 episodes - episode_reward: -205.146 [-319.078, -145.875] - loss: 11.606 - mae: 85.103 - mean_q: -112.308 Interval 9027 (4513000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8408 8 episodes - episode_reward: -184.909 [-236.343, -119.525] - loss: 12.121 - mae: 85.084 - mean_q: -112.270 Interval 9028 (4513500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8670 6 episodes - episode_reward: -149.317 [-168.176, -125.766] - loss: 8.846 - mae: 85.057 - mean_q: -112.264 Interval 9029 (4514000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9017 8 episodes - episode_reward: -120.599 [-190.044, 22.564] - loss: 7.531 - mae: 85.052 - mean_q: -112.279 Interval 9030 (4514500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4909 8 episodes - episode_reward: -147.618 [-229.343, 15.840] - loss: 12.819 - mae: 85.077 - mean_q: -112.275 Interval 9031 (4515000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8576 9 episodes - episode_reward: -164.260 [-224.940, -64.676] - loss: 10.490 - mae: 85.067 - mean_q: -112.214 Interval 9032 (4515500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0373 8 episodes - episode_reward: -183.193 [-290.578, -100.000] - loss: 10.255 - mae: 85.072 - mean_q: -112.198 Interval 9033 (4516000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7612 9 episodes - episode_reward: -161.299 [-194.522, -140.609] - loss: 10.293 - mae: 85.060 - mean_q: -112.173 Interval 9034 (4516500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2757 7 episodes - episode_reward: -164.893 [-265.202, -81.115] - loss: 10.432 - mae: 85.024 - mean_q: -112.160 Interval 9035 (4517000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6210 7 episodes - episode_reward: -173.670 [-231.297, -78.858] - loss: 12.387 - mae: 85.017 - mean_q: -112.130 Interval 9036 (4517500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9497 8 episodes - episode_reward: -194.386 [-269.142, -98.204] - loss: 9.614 - mae: 85.000 - mean_q: -112.075 Interval 9037 (4518000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1630 8 episodes - episode_reward: -186.098 [-244.160, -100.000] - loss: 9.760 - mae: 84.982 - mean_q: -112.051 Interval 9038 (4518500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0385 8 episodes - episode_reward: -201.792 [-291.044, -148.501] - loss: 11.973 - mae: 84.960 - mean_q: -112.021 Interval 9039 (4519000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9875 8 episodes - episode_reward: -188.198 [-302.901, -98.520] - loss: 11.975 - mae: 84.941 - mean_q: -111.978 Interval 9040 (4519500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3446 8 episodes - episode_reward: -144.013 [-221.930, -66.984] - loss: 9.761 - mae: 84.908 - mean_q: -111.932 Interval 9041 (4520000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9460 9 episodes - episode_reward: -168.664 [-247.079, 31.863] - loss: 11.695 - mae: 84.878 - mean_q: -111.905 Interval 9042 (4520500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9293 7 episodes - episode_reward: -205.852 [-265.151, -91.775] - loss: 14.847 - mae: 84.863 - mean_q: -111.884 Interval 9043 (4521000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6331 7 episodes - episode_reward: -189.628 [-244.320, -111.542] - loss: 12.436 - mae: 84.830 - mean_q: -111.865 Interval 9044 (4521500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9035 8 episodes - episode_reward: -180.650 [-241.323, -102.896] - loss: 12.798 - mae: 84.810 - mean_q: -111.835 Interval 9045 (4522000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7014 7 episodes - episode_reward: -176.424 [-221.006, -114.931] - loss: 11.159 - mae: 84.775 - mean_q: -111.810 Interval 9046 (4522500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6327 7 episodes - episode_reward: -193.116 [-279.612, -146.963] - loss: 10.805 - mae: 84.742 - mean_q: -111.793 Interval 9047 (4523000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7620 7 episodes - episode_reward: -204.968 [-278.946, -176.213] - loss: 11.743 - mae: 84.725 - mean_q: -111.764 Interval 9048 (4523500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5766 7 episodes - episode_reward: -176.763 [-264.721, -131.359] - loss: 12.153 - mae: 84.721 - mean_q: -111.736 Interval 9049 (4524000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6891 8 episodes - episode_reward: -234.647 [-408.170, -100.000] - loss: 9.446 - mae: 84.691 - mean_q: -111.712 Interval 9050 (4524500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5423 8 episodes - episode_reward: -164.634 [-228.675, 13.234] - loss: 11.551 - mae: 84.671 - mean_q: -111.705 Interval 9051 (4525000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4505 8 episodes - episode_reward: -157.626 [-228.798, -100.000] - loss: 13.542 - mae: 84.662 - mean_q: -111.660 Interval 9052 (4525500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8429 8 episodes - episode_reward: -173.211 [-248.833, -109.940] - loss: 11.786 - mae: 84.650 - mean_q: -111.628 Interval 9053 (4526000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1785 9 episodes - episode_reward: -166.492 [-198.453, -107.459] - loss: 9.034 - mae: 84.614 - mean_q: -111.622 Interval 9054 (4526500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0189 8 episodes - episode_reward: -196.873 [-257.524, -139.663] - loss: 14.293 - mae: 84.613 - mean_q: -111.584 Interval 9055 (4527000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8527 8 episodes - episode_reward: -183.032 [-252.001, -123.595] - loss: 12.337 - mae: 84.581 - mean_q: -111.555 Interval 9056 (4527500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1144 9 episodes - episode_reward: -168.112 [-276.588, -100.000] - loss: 9.219 - mae: 84.556 - mean_q: -111.545 Interval 9057 (4528000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3906 8 episodes - episode_reward: -208.038 [-370.251, -123.867] - loss: 12.621 - mae: 84.531 - mean_q: -111.534 Interval 9058 (4528500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5952 9 episodes - episode_reward: -203.434 [-252.765, -103.429] - loss: 8.863 - mae: 84.484 - mean_q: -111.527 Interval 9059 (4529000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4516 9 episodes - episode_reward: -187.324 [-223.206, -155.698] - loss: 11.486 - mae: 84.449 - mean_q: -111.514 Interval 9060 (4529500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7274 7 episodes - episode_reward: -261.990 [-391.566, -157.397] - loss: 11.612 - mae: 84.404 - mean_q: -111.499 Interval 9061 (4530000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3637 9 episodes - episode_reward: -190.318 [-234.004, -111.307] - loss: 11.276 - mae: 84.397 - mean_q: -111.478 Interval 9062 (4530500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9139 7 episodes - episode_reward: -150.827 [-245.497, 1.481] - loss: 9.344 - mae: 84.366 - mean_q: -111.468 Interval 9063 (4531000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.4620 6 episodes - episode_reward: -106.700 [-218.503, 28.253] - loss: 12.398 - mae: 84.351 - mean_q: -111.475 Interval 9064 (4531500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3989 7 episodes - episode_reward: -167.566 [-224.322, 0.421] - loss: 12.164 - mae: 84.321 - mean_q: -111.453 Interval 9065 (4532000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0841 7 episodes - episode_reward: -158.709 [-208.303, -46.239] - loss: 9.172 - mae: 84.293 - mean_q: -111.448 Interval 9066 (4532500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6080 8 episodes - episode_reward: -159.479 [-237.391, -102.002] - loss: 9.858 - mae: 84.280 - mean_q: -111.425 Interval 9067 (4533000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6461 7 episodes - episode_reward: -190.954 [-307.156, -131.525] - loss: 15.124 - mae: 84.300 - mean_q: -111.412 Interval 9068 (4533500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2692 8 episodes - episode_reward: -201.384 [-279.972, -162.147] - loss: 10.495 - mae: 84.255 - mean_q: -111.370 Interval 9069 (4534000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8699 8 episodes - episode_reward: -176.530 [-292.008, -100.000] - loss: 10.632 - mae: 84.228 - mean_q: -111.357 Interval 9070 (4534500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7405 9 episodes - episode_reward: -157.321 [-233.939, -100.000] - loss: 11.376 - mae: 84.200 - mean_q: -111.329 Interval 9071 (4535000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7924 8 episodes - episode_reward: -170.649 [-238.468, -128.321] - loss: 8.418 - mae: 84.152 - mean_q: -111.347 Interval 9072 (4535500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4113 8 episodes - episode_reward: -213.326 [-286.575, -141.674] - loss: 7.141 - mae: 84.129 - mean_q: -111.386 Interval 9073 (4536000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0483 8 episodes - episode_reward: -201.389 [-290.519, -147.324] - loss: 12.236 - mae: 84.145 - mean_q: -111.384 Interval 9074 (4536500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4865 7 episodes - episode_reward: -165.270 [-243.369, -116.004] - loss: 11.894 - mae: 84.136 - mean_q: -111.383 Interval 9075 (4537000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5088 8 episodes - episode_reward: -159.822 [-270.384, 28.597] - loss: 10.627 - mae: 84.119 - mean_q: -111.373 Interval 9076 (4537500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7053 10 episodes - episode_reward: -177.716 [-461.638, -103.973] - loss: 11.411 - mae: 84.135 - mean_q: -111.348 Interval 9077 (4538000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3218 10 episodes - episode_reward: -178.414 [-256.250, -100.000] - loss: 10.492 - mae: 84.112 - mean_q: -111.350 Interval 9078 (4538500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9437 8 episodes - episode_reward: -165.826 [-225.124, -120.117] - loss: 9.442 - mae: 84.100 - mean_q: -111.352 Interval 9079 (4539000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0996 10 episodes - episode_reward: -168.972 [-290.467, -91.133] - loss: 8.078 - mae: 84.082 - mean_q: -111.348 Interval 9080 (4539500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5585 7 episodes - episode_reward: -178.558 [-214.497, -153.564] - loss: 11.176 - mae: 84.091 - mean_q: -111.366 Interval 9081 (4540000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4349 7 episodes - episode_reward: -179.393 [-229.353, -100.033] - loss: 9.032 - mae: 84.068 - mean_q: -111.368 Interval 9082 (4540500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9788 9 episodes - episode_reward: -163.120 [-257.663, -100.000] - loss: 12.049 - mae: 84.064 - mean_q: -111.359 Interval 9083 (4541000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6234 7 episodes - episode_reward: -187.447 [-256.321, -138.522] - loss: 10.271 - mae: 84.048 - mean_q: -111.363 Interval 9084 (4541500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7380 8 episodes - episode_reward: -161.074 [-219.772, -122.043] - loss: 11.049 - mae: 84.040 - mean_q: -111.348 Interval 9085 (4542000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4722 8 episodes - episode_reward: -165.464 [-268.962, -25.186] - loss: 8.553 - mae: 84.026 - mean_q: -111.374 Interval 9086 (4542500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0795 7 episodes - episode_reward: -279.765 [-568.513, -135.135] - loss: 10.893 - mae: 84.054 - mean_q: -111.398 Interval 9087 (4543000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7783 6 episodes - episode_reward: -151.015 [-198.623, -46.670] - loss: 8.409 - mae: 84.055 - mean_q: -111.402 Interval 9088 (4543500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4269 7 episodes - episode_reward: -176.744 [-363.194, -6.525] - loss: 14.265 - mae: 84.096 - mean_q: -111.391 Interval 9089 (4544000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5054 8 episodes - episode_reward: -160.466 [-219.169, -113.773] - loss: 9.292 - mae: 84.070 - mean_q: -111.373 Interval 9090 (4544500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8556 8 episodes - episode_reward: -167.184 [-278.570, -77.294] - loss: 11.916 - mae: 84.104 - mean_q: -111.372 Interval 9091 (4545000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4795 8 episodes - episode_reward: -229.416 [-295.114, -124.177] - loss: 10.931 - mae: 84.122 - mean_q: -111.367 Interval 9092 (4545500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9101 8 episodes - episode_reward: -177.751 [-230.454, -100.000] - loss: 11.878 - mae: 84.130 - mean_q: -111.351 Interval 9093 (4546000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8943 8 episodes - episode_reward: -183.032 [-253.135, -138.574] - loss: 11.657 - mae: 84.126 - mean_q: -111.334 Interval 9094 (4546500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8021 8 episodes - episode_reward: -170.685 [-291.055, -100.000] - loss: 10.939 - mae: 84.120 - mean_q: -111.356 Interval 9095 (4547000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2281 7 episodes - episode_reward: -178.226 [-206.716, -156.923] - loss: 11.749 - mae: 84.135 - mean_q: -111.342 Interval 9096 (4547500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2872 7 episodes - episode_reward: -147.659 [-242.789, -7.727] - loss: 9.538 - mae: 84.117 - mean_q: -111.374 Interval 9097 (4548000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4327 9 episodes - episode_reward: -192.138 [-332.224, -127.074] - loss: 11.391 - mae: 84.132 - mean_q: -111.383 Interval 9098 (4548500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4346 9 episodes - episode_reward: -192.975 [-261.103, -100.000] - loss: 9.788 - mae: 84.113 - mean_q: -111.396 Interval 9099 (4549000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5718 7 episodes - episode_reward: -178.257 [-216.331, -159.698] - loss: 8.924 - mae: 84.073 - mean_q: -111.415 Interval 9100 (4549500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6434 8 episodes - episode_reward: -172.362 [-219.411, -131.643] - loss: 11.279 - mae: 84.074 - mean_q: -111.429 Interval 9101 (4550000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0859 7 episodes - episode_reward: -210.796 [-544.298, -115.834] - loss: 12.145 - mae: 84.036 - mean_q: -111.411 Interval 9102 (4550500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2901 5 episodes - episode_reward: -544.089 [-1061.485, -249.650] - loss: 9.325 - mae: 84.016 - mean_q: -111.347 Interval 9103 (4551000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -5.0989 2 episodes - episode_reward: -1246.905 [-1397.745, -1096.065] - loss: 13.733 - mae: 84.046 - mean_q: -111.321 Interval 9104 (4551500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6770 8 episodes - episode_reward: -161.516 [-191.840, -126.688] - loss: 9.931 - mae: 84.055 - mean_q: -111.335 Interval 9105 (4552000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8110 6 episodes - episode_reward: -160.450 [-229.158, -61.253] - loss: 9.899 - mae: 84.069 - mean_q: -111.344 Interval 9106 (4552500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0215 8 episodes - episode_reward: -178.502 [-213.812, -138.062] - loss: 8.394 - mae: 84.092 - mean_q: -111.347 Interval 9107 (4553000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7033 8 episodes - episode_reward: -181.543 [-219.826, -151.528] - loss: 12.228 - mae: 84.120 - mean_q: -111.344 Interval 9108 (4553500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1695 6 episodes - episode_reward: -175.407 [-278.875, -146.230] - loss: 8.131 - mae: 84.119 - mean_q: -111.330 Interval 9109 (4554000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.6815 11 episodes - episode_reward: -171.469 [-258.344, -100.000] - loss: 11.695 - mae: 84.146 - mean_q: -111.341 Interval 9110 (4554500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7648 7 episodes - episode_reward: -184.076 [-245.955, -132.238] - loss: 10.779 - mae: 84.167 - mean_q: -111.340 Interval 9111 (4555000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6484 8 episodes - episode_reward: -158.158 [-216.864, -115.453] - loss: 7.979 - mae: 84.179 - mean_q: -111.356 Interval 9112 (4555500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0648 9 episodes - episode_reward: -185.382 [-305.205, -117.780] - loss: 13.237 - mae: 84.207 - mean_q: -111.372 Interval 9113 (4556000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5437 7 episodes - episode_reward: -171.615 [-364.560, -63.152] - loss: 10.726 - mae: 84.200 - mean_q: -111.359 Interval 9114 (4556500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7497 8 episodes - episode_reward: -179.564 [-243.482, -100.000] - loss: 9.823 - mae: 84.196 - mean_q: -111.327 Interval 9115 (4557000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8656 8 episodes - episode_reward: -179.407 [-226.492, -100.000] - loss: 7.702 - mae: 84.187 - mean_q: -111.331 Interval 9116 (4557500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9751 8 episodes - episode_reward: -178.929 [-306.052, -90.539] - loss: 13.429 - mae: 84.203 - mean_q: -111.342 Interval 9117 (4558000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2161 8 episodes - episode_reward: -209.721 [-307.163, -119.087] - loss: 12.451 - mae: 84.198 - mean_q: -111.328 Interval 9118 (4558500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1466 8 episodes - episode_reward: -195.509 [-284.160, -138.232] - loss: 12.553 - mae: 84.204 - mean_q: -111.302 Interval 9119 (4559000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9782 7 episodes - episode_reward: -209.137 [-306.984, -116.111] - loss: 10.310 - mae: 84.187 - mean_q: -111.281 Interval 9120 (4559500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3230 7 episodes - episode_reward: -166.591 [-195.978, -117.474] - loss: 12.117 - mae: 84.205 - mean_q: -111.272 Interval 9121 (4560000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1218 8 episodes - episode_reward: -181.499 [-309.700, -65.755] - loss: 8.337 - mae: 84.179 - mean_q: -111.247 Interval 9122 (4560500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8087 8 episodes - episode_reward: -177.439 [-462.307, 7.765] - loss: 9.745 - mae: 84.172 - mean_q: -111.269 Interval 9123 (4561000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4581 9 episodes - episode_reward: -197.162 [-327.394, -123.468] - loss: 16.816 - mae: 84.209 - mean_q: -111.252 Interval 9124 (4561500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8810 8 episodes - episode_reward: -181.095 [-217.511, -141.680] - loss: 7.926 - mae: 84.153 - mean_q: -111.236 Interval 9125 (4562000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2977 9 episodes - episode_reward: -191.603 [-304.046, -100.000] - loss: 9.902 - mae: 84.167 - mean_q: -111.242 Interval 9126 (4562500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6025 10 episodes - episode_reward: -169.766 [-279.599, -100.000] - loss: 11.337 - mae: 84.181 - mean_q: -111.249 Interval 9127 (4563000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7485 7 episodes - episode_reward: -200.463 [-319.686, -74.474] - loss: 12.952 - mae: 84.159 - mean_q: -111.248 Interval 9128 (4563500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9172 7 episodes - episode_reward: -210.728 [-368.550, -126.233] - loss: 9.482 - mae: 84.145 - mean_q: -111.254 Interval 9129 (4564000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8950 8 episodes - episode_reward: -161.401 [-254.812, -100.000] - loss: 12.172 - mae: 84.165 - mean_q: -111.265 Interval 9130 (4564500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4086 9 episodes - episode_reward: -206.248 [-305.014, -121.396] - loss: 10.828 - mae: 84.177 - mean_q: -111.263 Interval 9131 (4565000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4688 8 episodes - episode_reward: -160.810 [-241.545, -67.082] - loss: 10.873 - mae: 84.182 - mean_q: -111.270 Interval 9132 (4565500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6326 8 episodes - episode_reward: -163.047 [-257.434, -96.546] - loss: 10.513 - mae: 84.168 - mean_q: -111.269 Interval 9133 (4566000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4855 6 episodes - episode_reward: -202.551 [-311.636, -88.153] - loss: 11.941 - mae: 84.185 - mean_q: -111.284 Interval 9134 (4566500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0399 8 episodes - episode_reward: -188.796 [-256.555, -100.000] - loss: 8.297 - mae: 84.153 - mean_q: -111.301 Interval 9135 (4567000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6161 7 episodes - episode_reward: -180.750 [-265.616, -145.508] - loss: 10.966 - mae: 84.193 - mean_q: -111.312 Interval 9136 (4567500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4062 9 episodes - episode_reward: -191.653 [-226.333, -118.543] - loss: 12.539 - mae: 84.198 - mean_q: -111.303 Interval 9137 (4568000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5478 8 episodes - episode_reward: -164.038 [-263.039, -12.266] - loss: 9.136 - mae: 84.179 - mean_q: -111.313 Interval 9138 (4568500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9818 9 episodes - episode_reward: -162.249 [-230.589, -95.468] - loss: 13.430 - mae: 84.195 - mean_q: -111.330 Interval 9139 (4569000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3294 9 episodes - episode_reward: -189.557 [-235.425, -100.000] - loss: 10.927 - mae: 84.184 - mean_q: -111.354 Interval 9140 (4569500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0963 6 episodes - episode_reward: -171.039 [-218.363, -136.942] - loss: 11.215 - mae: 84.205 - mean_q: -111.366 Interval 9141 (4570000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1161 8 episodes - episode_reward: -194.553 [-265.364, -118.680] - loss: 11.753 - mae: 84.229 - mean_q: -111.357 Interval 9142 (4570500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8695 8 episodes - episode_reward: -175.345 [-241.654, -131.194] - loss: 11.515 - mae: 84.221 - mean_q: -111.362 Interval 9143 (4571000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9459 8 episodes - episode_reward: -190.436 [-294.767, -94.999] - loss: 7.744 - mae: 84.220 - mean_q: -111.376 Interval 9144 (4571500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7628 8 episodes - episode_reward: -175.207 [-276.040, -0.277] - loss: 10.586 - mae: 84.222 - mean_q: -111.384 Interval 9145 (4572000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8308 6 episodes - episode_reward: -223.015 [-335.304, -131.356] - loss: 12.854 - mae: 84.250 - mean_q: -111.380 Interval 9146 (4572500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0626 9 episodes - episode_reward: -166.777 [-243.533, -100.000] - loss: 9.735 - mae: 84.253 - mean_q: -111.405 Interval 9147 (4573000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3962 10 episodes - episode_reward: -180.834 [-280.497, -106.086] - loss: 10.207 - mae: 84.270 - mean_q: -111.414 Interval 9148 (4573500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4097 10 episodes - episode_reward: -171.056 [-230.598, -100.000] - loss: 9.768 - mae: 84.265 - mean_q: -111.415 Interval 9149 (4574000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6185 6 episodes - episode_reward: -194.816 [-248.485, -149.201] - loss: 8.528 - mae: 84.254 - mean_q: -111.426 Interval 9150 (4574500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4764 9 episodes - episode_reward: -210.141 [-287.766, -113.023] - loss: 10.904 - mae: 84.244 - mean_q: -111.450 Interval 9151 (4575000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2550 8 episodes - episode_reward: -201.433 [-286.151, -138.176] - loss: 11.114 - mae: 84.250 - mean_q: -111.477 Interval 9152 (4575500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3188 8 episodes - episode_reward: -196.760 [-258.384, -113.756] - loss: 10.945 - mae: 84.277 - mean_q: -111.504 Interval 9153 (4576000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6760 9 episodes - episode_reward: -213.981 [-379.147, -140.812] - loss: 8.273 - mae: 84.274 - mean_q: -111.531 Interval 9154 (4576500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9798 9 episodes - episode_reward: -164.778 [-252.263, -90.275] - loss: 11.423 - mae: 84.311 - mean_q: -111.585 Interval 9155 (4577000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7338 7 episodes - episode_reward: -188.982 [-225.315, -122.326] - loss: 11.348 - mae: 84.339 - mean_q: -111.610 Interval 9156 (4577500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9578 8 episodes - episode_reward: -185.362 [-225.801, -128.357] - loss: 7.649 - mae: 84.319 - mean_q: -111.642 Interval 9157 (4578000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2593 7 episodes - episode_reward: -304.252 [-565.069, -153.008] - loss: 12.493 - mae: 84.374 - mean_q: -111.655 Interval 9158 (4578500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3588 7 episodes - episode_reward: -246.032 [-549.923, -102.583] - loss: 10.957 - mae: 84.374 - mean_q: -111.659 Interval 9159 (4579000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6840 8 episodes - episode_reward: -232.238 [-529.120, -133.206] - loss: 9.448 - mae: 84.381 - mean_q: -111.688 Interval 9160 (4579500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7237 5 episodes - episode_reward: -150.584 [-194.030, -79.617] - loss: 10.857 - mae: 84.407 - mean_q: -111.705 Interval 9161 (4580000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1780 8 episodes - episode_reward: -208.943 [-320.949, -145.118] - loss: 10.328 - mae: 84.441 - mean_q: -111.727 Interval 9162 (4580500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8672 8 episodes - episode_reward: -181.525 [-327.941, -104.127] - loss: 10.597 - mae: 84.469 - mean_q: -111.773 Interval 9163 (4581000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2782 9 episodes - episode_reward: -181.307 [-268.507, -100.000] - loss: 10.423 - mae: 84.499 - mean_q: -111.796 Interval 9164 (4581500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9139 7 episodes - episode_reward: -207.775 [-293.276, -124.925] - loss: 9.945 - mae: 84.508 - mean_q: -111.847 Interval 9165 (4582000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2191 8 episodes - episode_reward: -192.582 [-246.400, -113.950] - loss: 8.345 - mae: 84.527 - mean_q: -111.894 Interval 9166 (4582500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6934 9 episodes - episode_reward: -156.721 [-261.077, 1.787] - loss: 12.536 - mae: 84.568 - mean_q: -111.944 Interval 9167 (4583000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9470 8 episodes - episode_reward: -184.376 [-237.920, -139.658] - loss: 13.952 - mae: 84.603 - mean_q: -111.929 Interval 9168 (4583500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9217 8 episodes - episode_reward: -181.584 [-236.542, -100.000] - loss: 11.734 - mae: 84.611 - mean_q: -111.938 Interval 9169 (4584000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7252 8 episodes - episode_reward: -169.991 [-272.889, -140.917] - loss: 12.817 - mae: 84.620 - mean_q: -111.947 Interval 9170 (4584500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8143 7 episodes - episode_reward: -203.767 [-381.747, -145.036] - loss: 8.501 - mae: 84.627 - mean_q: -111.974 Interval 9171 (4585000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0818 7 episodes - episode_reward: -130.321 [-228.504, 27.932] - loss: 11.821 - mae: 84.645 - mean_q: -111.985 Interval 9172 (4585500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4099 8 episodes - episode_reward: -164.185 [-248.146, -127.381] - loss: 10.444 - mae: 84.672 - mean_q: -112.010 Interval 9173 (4586000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5985 7 episodes - episode_reward: -175.163 [-255.092, -119.833] - loss: 10.188 - mae: 84.684 - mean_q: -112.019 Interval 9174 (4586500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9244 9 episodes - episode_reward: -171.450 [-280.963, -129.164] - loss: 11.012 - mae: 84.720 - mean_q: -112.043 Interval 9175 (4587000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4149 9 episodes - episode_reward: -178.006 [-226.689, -118.115] - loss: 12.081 - mae: 84.740 - mean_q: -112.046 Interval 9176 (4587500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0391 8 episodes - episode_reward: -205.719 [-303.069, -127.054] - loss: 10.644 - mae: 84.752 - mean_q: -112.072 Interval 9177 (4588000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6994 6 episodes - episode_reward: -204.311 [-271.024, -151.416] - loss: 9.973 - mae: 84.767 - mean_q: -112.061 Interval 9178 (4588500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9110 7 episodes - episode_reward: -205.628 [-256.831, -153.088] - loss: 9.472 - mae: 84.791 - mean_q: -112.094 Interval 9179 (4589000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6706 7 episodes - episode_reward: -201.389 [-246.502, -144.335] - loss: 7.739 - mae: 84.813 - mean_q: -112.115 Interval 9180 (4589500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0409 8 episodes - episode_reward: -183.575 [-221.830, -100.000] - loss: 14.421 - mae: 84.851 - mean_q: -112.130 Interval 9181 (4590000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6676 7 episodes - episode_reward: -197.879 [-250.893, -161.482] - loss: 7.070 - mae: 84.828 - mean_q: -112.155 Interval 9182 (4590500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3550 10 episodes - episode_reward: -170.221 [-232.380, -114.976] - loss: 9.122 - mae: 84.850 - mean_q: -112.187 Interval 9183 (4591000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5768 7 episodes - episode_reward: -188.663 [-271.526, -25.505] - loss: 11.130 - mae: 84.871 - mean_q: -112.225 Interval 9184 (4591500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2937 8 episodes - episode_reward: -204.624 [-236.852, -164.036] - loss: 11.609 - mae: 84.906 - mean_q: -112.209 Interval 9185 (4592000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.1813 6 episodes - episode_reward: -163.598 [-256.746, 2.172] - loss: 9.357 - mae: 84.895 - mean_q: -112.246 Interval 9186 (4592500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5726 8 episodes - episode_reward: -174.950 [-252.494, -30.909] - loss: 9.272 - mae: 84.907 - mean_q: -112.288 Interval 9187 (4593000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0395 7 episodes - episode_reward: -139.704 [-207.462, 1.621] - loss: 12.947 - mae: 84.933 - mean_q: -112.321 Interval 9188 (4593500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6637 7 episodes - episode_reward: -181.990 [-234.862, -142.077] - loss: 9.750 - mae: 84.944 - mean_q: -112.323 Interval 9189 (4594000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8969 10 episodes - episode_reward: -154.009 [-242.008, 19.424] - loss: 11.739 - mae: 84.967 - mean_q: -112.339 Interval 9190 (4594500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1732 7 episodes - episode_reward: -222.831 [-360.292, -137.761] - loss: 14.037 - mae: 84.979 - mean_q: -112.335 Interval 9191 (4595000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1066 6 episodes - episode_reward: -182.722 [-384.872, 33.785] - loss: 9.025 - mae: 84.970 - mean_q: -112.368 Interval 9192 (4595500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2100 6 episodes - episode_reward: -184.284 [-220.894, -120.744] - loss: 11.934 - mae: 84.983 - mean_q: -112.417 Interval 9193 (4596000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9976 8 episodes - episode_reward: -179.894 [-290.537, -117.844] - loss: 13.491 - mae: 84.999 - mean_q: -112.430 Interval 9194 (4596500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1969 9 episodes - episode_reward: -181.033 [-279.663, -134.331] - loss: 8.118 - mae: 84.985 - mean_q: -112.459 Interval 9195 (4597000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4854 6 episodes - episode_reward: -201.062 [-264.371, -160.661] - loss: 9.823 - mae: 84.996 - mean_q: -112.519 Interval 9196 (4597500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7476 8 episodes - episode_reward: -176.720 [-198.434, -161.583] - loss: 10.080 - mae: 85.015 - mean_q: -112.584 Interval 9197 (4598000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1574 9 episodes - episode_reward: -177.076 [-225.169, -123.346] - loss: 11.753 - mae: 85.033 - mean_q: -112.608 Interval 9198 (4598500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8536 7 episodes - episode_reward: -198.852 [-354.668, -100.000] - loss: 13.587 - mae: 85.066 - mean_q: -112.627 Interval 9199 (4599000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6462 7 episodes - episode_reward: -182.534 [-306.027, -142.425] - loss: 12.980 - mae: 85.085 - mean_q: -112.628 Interval 9200 (4599500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0898 9 episodes - episode_reward: -236.982 [-493.528, -125.884] - loss: 9.725 - mae: 85.078 - mean_q: -112.642 Interval 9201 (4600000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.9722 7 episodes - episode_reward: -421.420 [-959.219, -156.178] - loss: 11.265 - mae: 85.089 - mean_q: -112.659 Interval 9202 (4600500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8215 6 episodes - episode_reward: -210.537 [-334.312, -154.878] - loss: 11.338 - mae: 85.100 - mean_q: -112.692 Interval 9203 (4601000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4459 8 episodes - episode_reward: -176.735 [-348.273, -3.373] - loss: 12.766 - mae: 85.084 - mean_q: -112.704 Interval 9204 (4601500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3644 7 episodes - episode_reward: -171.563 [-232.311, -135.165] - loss: 14.102 - mae: 85.076 - mean_q: -112.728 Interval 9205 (4602000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5670 7 episodes - episode_reward: -175.413 [-217.936, -142.136] - loss: 11.820 - mae: 85.051 - mean_q: -112.725 Interval 9206 (4602500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6672 5 episodes - episode_reward: -448.599 [-617.729, -331.221] - loss: 13.096 - mae: 85.061 - mean_q: -112.692 Interval 9207 (4603000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.4608 3 episodes - episode_reward: -757.481 [-809.982, -715.186] - loss: 10.058 - mae: 85.064 - mean_q: -112.669 Interval 9208 (4603500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7860 7 episodes - episode_reward: -351.222 [-1011.650, -176.704] - loss: 14.923 - mae: 85.136 - mean_q: -112.731 Interval 9209 (4604000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9050 9 episodes - episode_reward: -160.821 [-291.028, 53.868] - loss: 14.540 - mae: 85.192 - mean_q: -112.750 Interval 9210 (4604500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5379 8 episodes - episode_reward: -161.481 [-236.002, 4.595] - loss: 9.566 - mae: 85.224 - mean_q: -112.774 Interval 9211 (4605000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9875 7 episodes - episode_reward: -207.828 [-290.649, -144.231] - loss: 10.603 - mae: 85.266 - mean_q: -112.808 Interval 9212 (4605500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6497 9 episodes - episode_reward: -145.864 [-240.759, -44.569] - loss: 8.435 - mae: 85.302 - mean_q: -112.851 Interval 9213 (4606000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8083 7 episodes - episode_reward: -202.116 [-240.037, -140.082] - loss: 16.396 - mae: 85.369 - mean_q: -112.884 Interval 9214 (4606500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3519 8 episodes - episode_reward: -213.532 [-281.751, -160.667] - loss: 11.341 - mae: 85.382 - mean_q: -112.865 Interval 9215 (4607000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7994 8 episodes - episode_reward: -175.315 [-217.995, -100.000] - loss: 11.416 - mae: 85.423 - mean_q: -112.877 Interval 9216 (4607500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9523 8 episodes - episode_reward: -177.381 [-310.617, -100.000] - loss: 13.739 - mae: 85.455 - mean_q: -112.891 Interval 9217 (4608000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7454 7 episodes - episode_reward: -204.321 [-269.258, -168.904] - loss: 14.633 - mae: 85.485 - mean_q: -112.888 Interval 9218 (4608500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2590 7 episodes - episode_reward: -226.681 [-335.206, -176.866] - loss: 10.589 - mae: 85.489 - mean_q: -112.910 Interval 9219 (4609000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5873 7 episodes - episode_reward: -180.480 [-253.194, -138.904] - loss: 11.214 - mae: 85.516 - mean_q: -112.960 Interval 9220 (4609500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7537 8 episodes - episode_reward: -184.332 [-226.031, -138.904] - loss: 12.266 - mae: 85.545 - mean_q: -112.994 Interval 9221 (4610000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2207 8 episodes - episode_reward: -196.370 [-249.159, -130.423] - loss: 12.211 - mae: 85.576 - mean_q: -113.029 Interval 9222 (4610500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7679 8 episodes - episode_reward: -168.114 [-242.656, -100.000] - loss: 11.633 - mae: 85.596 - mean_q: -113.042 Interval 9223 (4611000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5164 8 episodes - episode_reward: -160.095 [-224.125, -29.978] - loss: 11.919 - mae: 85.611 - mean_q: -113.059 Interval 9224 (4611500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1598 9 episodes - episode_reward: -173.131 [-234.450, -100.000] - loss: 12.046 - mae: 85.644 - mean_q: -113.120 Interval 9225 (4612000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8771 9 episodes - episode_reward: -164.262 [-214.178, -121.058] - loss: 12.718 - mae: 85.660 - mean_q: -113.140 Interval 9226 (4612500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5727 10 episodes - episode_reward: -174.867 [-240.694, -100.000] - loss: 11.542 - mae: 85.662 - mean_q: -113.145 Interval 9227 (4613000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2219 10 episodes - episode_reward: -165.837 [-225.453, -100.000] - loss: 10.966 - mae: 85.691 - mean_q: -113.137 Interval 9228 (4613500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0108 8 episodes - episode_reward: -186.611 [-271.559, -140.631] - loss: 8.423 - mae: 85.681 - mean_q: -113.167 Interval 9229 (4614000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8385 9 episodes - episode_reward: -148.541 [-211.563, -62.485] - loss: 12.314 - mae: 85.736 - mean_q: -113.184 Interval 9230 (4614500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7094 8 episodes - episode_reward: -169.812 [-225.631, -100.000] - loss: 10.468 - mae: 85.750 - mean_q: -113.199 Interval 9231 (4615000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4963 7 episodes - episode_reward: -188.781 [-262.738, -130.612] - loss: 10.049 - mae: 85.759 - mean_q: -113.219 Interval 9232 (4615500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6152 9 episodes - episode_reward: -203.134 [-364.322, -100.000] - loss: 9.978 - mae: 85.764 - mean_q: -113.232 Interval 9233 (4616000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0233 11 episodes - episode_reward: -228.123 [-880.966, -96.972] - loss: 10.717 - mae: 85.782 - mean_q: -113.273 Interval 9234 (4616500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.7864 7 episodes - episode_reward: -341.460 [-874.496, -171.702] - loss: 12.409 - mae: 85.825 - mean_q: -113.291 Interval 9235 (4617000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3381 7 episodes - episode_reward: -163.375 [-253.680, -10.947] - loss: 11.531 - mae: 85.841 - mean_q: -113.326 Interval 9236 (4617500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6349 8 episodes - episode_reward: -173.836 [-248.742, -139.313] - loss: 13.610 - mae: 85.877 - mean_q: -113.356 Interval 9237 (4618000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9109 7 episodes - episode_reward: -198.746 [-259.263, -152.022] - loss: 10.895 - mae: 85.872 - mean_q: -113.359 Interval 9238 (4618500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2389 8 episodes - episode_reward: -192.023 [-251.435, -148.526] - loss: 11.136 - mae: 85.901 - mean_q: -113.400 Interval 9239 (4619000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9439 6 episodes - episode_reward: -169.087 [-205.608, -53.559] - loss: 14.705 - mae: 85.941 - mean_q: -113.413 Interval 9240 (4619500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9022 8 episodes - episode_reward: -187.811 [-250.188, -136.492] - loss: 10.919 - mae: 85.941 - mean_q: -113.405 Interval 9241 (4620000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5190 7 episodes - episode_reward: -174.671 [-242.610, -113.154] - loss: 11.233 - mae: 85.960 - mean_q: -113.408 Interval 9242 (4620500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3891 7 episodes - episode_reward: -177.033 [-231.481, -143.955] - loss: 10.804 - mae: 85.988 - mean_q: -113.451 Interval 9243 (4621000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0823 8 episodes - episode_reward: -194.784 [-281.598, -100.000] - loss: 12.014 - mae: 86.010 - mean_q: -113.464 Interval 9244 (4621500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7901 7 episodes - episode_reward: -182.587 [-267.844, -100.000] - loss: 13.300 - mae: 86.034 - mean_q: -113.503 Interval 9245 (4622000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3005 9 episodes - episode_reward: -186.420 [-227.216, -146.867] - loss: 10.602 - mae: 86.048 - mean_q: -113.520 Interval 9246 (4622500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0252 6 episodes - episode_reward: -255.609 [-333.212, -200.926] - loss: 10.894 - mae: 86.096 - mean_q: -113.513 Interval 9247 (4623000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9935 9 episodes - episode_reward: -170.343 [-257.172, 1.936] - loss: 11.242 - mae: 86.100 - mean_q: -113.530 Interval 9248 (4623500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4967 6 episodes - episode_reward: -207.249 [-324.178, -138.423] - loss: 10.223 - mae: 86.117 - mean_q: -113.556 Interval 9249 (4624000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0886 8 episodes - episode_reward: -192.236 [-266.319, -100.000] - loss: 11.547 - mae: 86.117 - mean_q: -113.557 Interval 9250 (4624500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9283 9 episodes - episode_reward: -156.334 [-227.877, -100.000] - loss: 11.874 - mae: 86.123 - mean_q: -113.588 Interval 9251 (4625000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1357 7 episodes - episode_reward: -157.805 [-205.759, -102.692] - loss: 15.630 - mae: 86.159 - mean_q: -113.580 Interval 9252 (4625500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5941 8 episodes - episode_reward: -156.549 [-210.473, -90.310] - loss: 10.874 - mae: 86.168 - mean_q: -113.572 Interval 9253 (4626000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0534 9 episodes - episode_reward: -180.033 [-234.318, -84.697] - loss: 12.556 - mae: 86.187 - mean_q: -113.558 Interval 9254 (4626500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3949 6 episodes - episode_reward: -190.703 [-277.256, -108.305] - loss: 12.052 - mae: 86.203 - mean_q: -113.567 Interval 9255 (4627000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4670 7 episodes - episode_reward: -169.201 [-237.443, -32.552] - loss: 12.307 - mae: 86.218 - mean_q: -113.569 Interval 9256 (4627500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5286 7 episodes - episode_reward: -196.033 [-305.688, -139.921] - loss: 11.870 - mae: 86.224 - mean_q: -113.576 Interval 9257 (4628000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2069 6 episodes - episode_reward: -175.106 [-264.795, -19.736] - loss: 9.895 - mae: 86.219 - mean_q: -113.584 Interval 9258 (4628500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2172 8 episodes - episode_reward: -206.009 [-296.719, -158.553] - loss: 13.322 - mae: 86.233 - mean_q: -113.591 Interval 9259 (4629000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5333 7 episodes - episode_reward: -177.482 [-229.728, -141.084] - loss: 10.641 - mae: 86.234 - mean_q: -113.599 Interval 9260 (4629500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2140 8 episodes - episode_reward: -196.950 [-331.947, -120.898] - loss: 9.943 - mae: 86.240 - mean_q: -113.620 Interval 9261 (4630000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3825 8 episodes - episode_reward: -200.533 [-274.261, -127.380] - loss: 10.599 - mae: 86.256 - mean_q: -113.613 Interval 9262 (4630500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0057 8 episodes - episode_reward: -205.279 [-405.148, -108.547] - loss: 12.359 - mae: 86.257 - mean_q: -113.601 Interval 9263 (4631000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5404 7 episodes - episode_reward: -170.979 [-236.441, -115.684] - loss: 10.390 - mae: 86.255 - mean_q: -113.615 Interval 9264 (4631500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2904 9 episodes - episode_reward: -186.430 [-235.590, -112.253] - loss: 11.526 - mae: 86.261 - mean_q: -113.605 Interval 9265 (4632000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4554 8 episodes - episode_reward: -210.567 [-287.549, -100.000] - loss: 12.587 - mae: 86.259 - mean_q: -113.563 Interval 9266 (4632500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5181 7 episodes - episode_reward: -187.623 [-261.721, -119.613] - loss: 13.712 - mae: 86.274 - mean_q: -113.560 Interval 9267 (4633000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5505 8 episodes - episode_reward: -153.967 [-234.574, -117.325] - loss: 9.943 - mae: 86.251 - mean_q: -113.519 Interval 9268 (4633500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0144 7 episodes - episode_reward: -145.894 [-234.375, 24.764] - loss: 10.550 - mae: 86.267 - mean_q: -113.533 Interval 9269 (4634000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8479 7 episodes - episode_reward: -203.141 [-229.545, -117.626] - loss: 11.273 - mae: 86.270 - mean_q: -113.527 Interval 9270 (4634500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0548 8 episodes - episode_reward: -193.771 [-258.666, -125.803] - loss: 8.956 - mae: 86.269 - mean_q: -113.526 Interval 9271 (4635000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0747 9 episodes - episode_reward: -171.907 [-226.222, -100.000] - loss: 13.804 - mae: 86.295 - mean_q: -113.504 Interval 9272 (4635500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6924 7 episodes - episode_reward: -186.754 [-235.164, -148.805] - loss: 12.578 - mae: 86.269 - mean_q: -113.504 Interval 9273 (4636000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2404 10 episodes - episode_reward: -169.331 [-222.361, -100.000] - loss: 11.624 - mae: 86.291 - mean_q: -113.507 Interval 9274 (4636500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1965 8 episodes - episode_reward: -199.090 [-339.338, -119.453] - loss: 11.783 - mae: 86.302 - mean_q: -113.496 Interval 9275 (4637000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5666 7 episodes - episode_reward: -185.020 [-306.145, -132.151] - loss: 13.512 - mae: 86.298 - mean_q: -113.487 Interval 9276 (4637500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3096 7 episodes - episode_reward: -163.083 [-248.103, 26.019] - loss: 13.669 - mae: 86.308 - mean_q: -113.477 Interval 9277 (4638000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1769 8 episodes - episode_reward: -187.455 [-253.255, -149.330] - loss: 15.196 - mae: 86.299 - mean_q: -113.449 Interval 9278 (4638500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9468 9 episodes - episode_reward: -173.081 [-257.786, -101.611] - loss: 14.597 - mae: 86.288 - mean_q: -113.435 Interval 9279 (4639000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2829 9 episodes - episode_reward: -182.181 [-266.538, -100.000] - loss: 13.350 - mae: 86.278 - mean_q: -113.417 Interval 9280 (4639500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0330 6 episodes - episode_reward: -172.398 [-214.614, -106.923] - loss: 11.885 - mae: 86.260 - mean_q: -113.408 Interval 9281 (4640000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1457 7 episodes - episode_reward: -147.299 [-222.399, -24.674] - loss: 10.363 - mae: 86.241 - mean_q: -113.410 Interval 9282 (4640500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5891 8 episodes - episode_reward: -163.401 [-383.479, -91.551] - loss: 12.648 - mae: 86.235 - mean_q: -113.432 Interval 9283 (4641000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8649 9 episodes - episode_reward: -159.875 [-225.170, -120.206] - loss: 13.563 - mae: 86.240 - mean_q: -113.440 Interval 9284 (4641500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9248 8 episodes - episode_reward: -175.726 [-248.627, -130.980] - loss: 14.440 - mae: 86.238 - mean_q: -113.413 Interval 9285 (4642000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7176 9 episodes - episode_reward: -158.956 [-240.009, -66.489] - loss: 12.782 - mae: 86.197 - mean_q: -113.402 Interval 9286 (4642500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5105 8 episodes - episode_reward: -155.727 [-196.416, -121.657] - loss: 13.703 - mae: 86.211 - mean_q: -113.397 Interval 9287 (4643000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9505 6 episodes - episode_reward: -155.058 [-272.580, 16.435] - loss: 14.527 - mae: 86.206 - mean_q: -113.371 Interval 9288 (4643500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0125 8 episodes - episode_reward: -176.198 [-251.127, -97.932] - loss: 12.910 - mae: 86.174 - mean_q: -113.325 Interval 9289 (4644000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6330 8 episodes - episode_reward: -185.525 [-259.443, -11.633] - loss: 10.054 - mae: 86.146 - mean_q: -113.306 Interval 9290 (4644500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0238 8 episodes - episode_reward: -181.622 [-223.888, -120.741] - loss: 9.688 - mae: 86.142 - mean_q: -113.298 Interval 9291 (4645000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5258 9 episodes - episode_reward: -138.297 [-263.318, 5.385] - loss: 11.882 - mae: 86.148 - mean_q: -113.280 Interval 9292 (4645500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7015 11 episodes - episode_reward: -173.526 [-233.226, -113.109] - loss: 12.686 - mae: 86.168 - mean_q: -113.260 Interval 9293 (4646000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7437 7 episodes - episode_reward: -195.915 [-256.554, -152.381] - loss: 10.655 - mae: 86.138 - mean_q: -113.225 Interval 9294 (4646500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1877 6 episodes - episode_reward: -170.584 [-204.540, -132.426] - loss: 10.502 - mae: 86.130 - mean_q: -113.220 Interval 9295 (4647000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7021 9 episodes - episode_reward: -155.762 [-223.285, -36.628] - loss: 9.977 - mae: 86.115 - mean_q: -113.211 Interval 9296 (4647500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0559 8 episodes - episode_reward: -189.270 [-268.118, -118.199] - loss: 13.290 - mae: 86.135 - mean_q: -113.196 Interval 9297 (4648000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5939 7 episodes - episode_reward: -187.520 [-282.167, -140.041] - loss: 11.813 - mae: 86.126 - mean_q: -113.162 Interval 9298 (4648500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1181 8 episodes - episode_reward: -197.043 [-252.078, -152.815] - loss: 12.115 - mae: 86.107 - mean_q: -113.137 Interval 9299 (4649000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3411 9 episodes - episode_reward: -186.571 [-239.565, -120.013] - loss: 12.036 - mae: 86.098 - mean_q: -113.109 Interval 9300 (4649500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3431 9 episodes - episode_reward: -182.598 [-276.307, -95.153] - loss: 14.547 - mae: 86.066 - mean_q: -113.113 Interval 9301 (4650000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8470 9 episodes - episode_reward: -161.308 [-215.188, -33.044] - loss: 11.420 - mae: 85.996 - mean_q: -113.098 Interval 9302 (4650500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4487 9 episodes - episode_reward: -190.387 [-284.855, -138.161] - loss: 12.640 - mae: 85.984 - mean_q: -113.093 Interval 9303 (4651000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9988 9 episodes - episode_reward: -164.319 [-232.708, -100.000] - loss: 10.789 - mae: 85.953 - mean_q: -113.082 Interval 9304 (4651500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6472 7 episodes - episode_reward: -181.588 [-296.280, -106.254] - loss: 11.133 - mae: 85.928 - mean_q: -113.091 Interval 9305 (4652000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2056 8 episodes - episode_reward: -205.265 [-381.954, -130.736] - loss: 10.712 - mae: 85.890 - mean_q: -113.087 Interval 9306 (4652500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9731 8 episodes - episode_reward: -188.314 [-271.055, -119.687] - loss: 7.356 - mae: 85.834 - mean_q: -113.088 Interval 9307 (4653000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6131 7 episodes - episode_reward: -179.471 [-236.026, -146.613] - loss: 10.817 - mae: 85.788 - mean_q: -113.086 Interval 9308 (4653500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9203 7 episodes - episode_reward: -146.534 [-193.152, -48.916] - loss: 9.384 - mae: 85.722 - mean_q: -113.065 Interval 9309 (4654000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2366 8 episodes - episode_reward: -203.241 [-301.290, -105.809] - loss: 8.915 - mae: 85.664 - mean_q: -113.062 Interval 9310 (4654500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9514 9 episodes - episode_reward: -158.407 [-209.177, -11.731] - loss: 10.461 - mae: 85.626 - mean_q: -113.079 Interval 9311 (4655000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9155 8 episodes - episode_reward: -186.628 [-261.553, -146.749] - loss: 8.450 - mae: 85.581 - mean_q: -113.100 Interval 9312 (4655500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0074 5 episodes - episode_reward: -505.915 [-767.854, -291.898] - loss: 11.831 - mae: 85.537 - mean_q: -113.066 Interval 9313 (4656000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6062 5 episodes - episode_reward: -417.711 [-670.337, -170.564] - loss: 10.533 - mae: 85.510 - mean_q: -113.020 Interval 9314 (4656500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.6572 8 episodes - episode_reward: -253.754 [-630.671, -100.000] - loss: 10.341 - mae: 85.511 - mean_q: -113.060 Interval 9315 (4657000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2078 9 episodes - episode_reward: -177.256 [-266.464, -100.000] - loss: 9.936 - mae: 85.517 - mean_q: -113.066 Interval 9316 (4657500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7160 7 episodes - episode_reward: -185.402 [-217.956, -146.353] - loss: 14.293 - mae: 85.548 - mean_q: -113.049 Interval 9317 (4658000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3424 8 episodes - episode_reward: -213.624 [-277.707, -171.494] - loss: 12.502 - mae: 85.544 - mean_q: -113.018 Interval 9318 (4658500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2810 7 episodes - episode_reward: -162.195 [-236.566, -100.000] - loss: 10.481 - mae: 85.530 - mean_q: -112.992 Interval 9319 (4659000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0445 9 episodes - episode_reward: -173.870 [-207.742, -100.000] - loss: 9.226 - mae: 85.532 - mean_q: -112.995 Interval 9320 (4659500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0585 7 episodes - episode_reward: -141.307 [-176.998, -100.000] - loss: 9.575 - mae: 85.541 - mean_q: -113.001 Interval 9321 (4660000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7665 7 episodes - episode_reward: -191.462 [-268.399, -140.548] - loss: 10.482 - mae: 85.545 - mean_q: -112.996 Interval 9322 (4660500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1908 8 episodes - episode_reward: -205.713 [-277.157, -180.551] - loss: 8.655 - mae: 85.531 - mean_q: -113.004 Interval 9323 (4661000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4390 6 episodes - episode_reward: -201.642 [-249.818, -169.179] - loss: 12.454 - mae: 85.544 - mean_q: -112.998 Interval 9324 (4661500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7134 7 episodes - episode_reward: -184.072 [-222.857, -154.432] - loss: 8.440 - mae: 85.529 - mean_q: -112.975 Interval 9325 (4662000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9790 8 episodes - episode_reward: -198.199 [-243.048, -104.653] - loss: 11.017 - mae: 85.522 - mean_q: -112.976 Interval 9326 (4662500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8006 10 episodes - episode_reward: -187.108 [-304.188, -100.000] - loss: 8.540 - mae: 85.516 - mean_q: -112.964 Interval 9327 (4663000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3758 8 episodes - episode_reward: -211.299 [-333.340, -161.118] - loss: 8.288 - mae: 85.526 - mean_q: -112.981 Interval 9328 (4663500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0082 8 episodes - episode_reward: -186.673 [-252.048, -147.791] - loss: 8.694 - mae: 85.527 - mean_q: -112.971 Interval 9329 (4664000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7713 8 episodes - episode_reward: -179.432 [-321.255, 20.968] - loss: 9.727 - mae: 85.529 - mean_q: -112.986 Interval 9330 (4664500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4822 7 episodes - episode_reward: -167.205 [-339.291, -44.964] - loss: 11.889 - mae: 85.555 - mean_q: -112.977 Interval 9331 (4665000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2254 9 episodes - episode_reward: -184.935 [-265.371, -140.180] - loss: 11.040 - mae: 85.538 - mean_q: -112.960 Interval 9332 (4665500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1760 6 episodes - episode_reward: -172.311 [-298.160, -35.081] - loss: 14.922 - mae: 85.543 - mean_q: -112.933 Interval 9333 (4666000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7723 11 episodes - episode_reward: -175.441 [-267.900, -100.000] - loss: 11.802 - mae: 85.532 - mean_q: -112.888 Interval 9334 (4666500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6221 7 episodes - episode_reward: -182.172 [-248.611, -137.936] - loss: 10.743 - mae: 85.490 - mean_q: -112.863 Interval 9335 (4667000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7187 11 episodes - episode_reward: -171.960 [-354.014, -100.000] - loss: 9.936 - mae: 85.458 - mean_q: -112.840 Interval 9336 (4667500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5420 8 episodes - episode_reward: -157.943 [-214.541, -109.617] - loss: 10.432 - mae: 85.443 - mean_q: -112.837 Interval 9337 (4668000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9309 8 episodes - episode_reward: -182.427 [-241.190, -140.160] - loss: 12.193 - mae: 85.431 - mean_q: -112.832 Interval 9338 (4668500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6622 8 episodes - episode_reward: -172.471 [-280.848, -15.897] - loss: 13.631 - mae: 85.403 - mean_q: -112.793 Interval 9339 (4669000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4741 8 episodes - episode_reward: -154.110 [-231.691, -30.488] - loss: 10.458 - mae: 85.359 - mean_q: -112.760 Interval 9340 (4669500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9352 7 episodes - episode_reward: -201.730 [-273.335, -142.964] - loss: 9.845 - mae: 85.331 - mean_q: -112.760 Interval 9341 (4670000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9905 6 episodes - episode_reward: -186.290 [-320.578, -140.765] - loss: 8.441 - mae: 85.291 - mean_q: -112.761 Interval 9342 (4670500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5083 7 episodes - episode_reward: -378.392 [-1097.309, 25.155] - loss: 11.189 - mae: 85.299 - mean_q: -112.764 Interval 9343 (4671000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0683 7 episodes - episode_reward: -227.847 [-334.654, -160.326] - loss: 12.134 - mae: 85.280 - mean_q: -112.767 Interval 9344 (4671500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9582 8 episodes - episode_reward: -179.150 [-299.949, -44.465] - loss: 11.453 - mae: 85.271 - mean_q: -112.749 Interval 9345 (4672000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8757 9 episodes - episode_reward: -162.117 [-216.826, -100.000] - loss: 10.070 - mae: 85.274 - mean_q: -112.762 Interval 9346 (4672500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0185 7 episodes - episode_reward: -144.166 [-191.878, -36.278] - loss: 10.750 - mae: 85.272 - mean_q: -112.779 Interval 9347 (4673000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4920 6 episodes - episode_reward: -197.253 [-248.952, -145.911] - loss: 11.967 - mae: 85.296 - mean_q: -112.759 Interval 9348 (4673500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9901 9 episodes - episode_reward: -172.832 [-222.259, -138.626] - loss: 12.689 - mae: 85.316 - mean_q: -112.748 Interval 9349 (4674000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9739 6 episodes - episode_reward: -232.477 [-332.395, -155.860] - loss: 11.270 - mae: 85.303 - mean_q: -112.726 Interval 9350 (4674500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5326 8 episodes - episode_reward: -220.815 [-275.881, -142.766] - loss: 10.949 - mae: 85.312 - mean_q: -112.715 Interval 9351 (4675000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2742 9 episodes - episode_reward: -182.898 [-257.853, -127.300] - loss: 12.995 - mae: 85.327 - mean_q: -112.715 Interval 9352 (4675500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2192 10 episodes - episode_reward: -167.982 [-208.783, -100.000] - loss: 9.663 - mae: 85.295 - mean_q: -112.735 Interval 9353 (4676000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5512 8 episodes - episode_reward: -158.457 [-213.725, -120.495] - loss: 13.352 - mae: 85.332 - mean_q: -112.728 Interval 9354 (4676500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3258 8 episodes - episode_reward: -211.019 [-282.144, -137.824] - loss: 9.535 - mae: 85.311 - mean_q: -112.701 Interval 9355 (4677000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9799 6 episodes - episode_reward: -179.574 [-246.076, -129.087] - loss: 9.301 - mae: 85.307 - mean_q: -112.727 Interval 9356 (4677500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8267 8 episodes - episode_reward: -161.256 [-290.615, 58.477] - loss: 12.821 - mae: 85.325 - mean_q: -112.719 Interval 9357 (4678000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9282 7 episodes - episode_reward: -206.683 [-269.662, -122.584] - loss: 10.958 - mae: 85.328 - mean_q: -112.701 Interval 9358 (4678500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2309 8 episodes - episode_reward: -202.517 [-289.023, -129.504] - loss: 10.914 - mae: 85.324 - mean_q: -112.690 Interval 9359 (4679000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6683 9 episodes - episode_reward: -151.652 [-312.271, -92.175] - loss: 15.272 - mae: 85.340 - mean_q: -112.657 Interval 9360 (4679500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5840 7 episodes - episode_reward: -181.842 [-291.204, -124.256] - loss: 10.262 - mae: 85.302 - mean_q: -112.649 Interval 9361 (4680000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3880 7 episodes - episode_reward: -162.169 [-313.254, 29.120] - loss: 14.603 - mae: 85.328 - mean_q: -112.642 Interval 9362 (4680500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3289 9 episodes - episode_reward: -194.184 [-296.719, -127.211] - loss: 12.957 - mae: 85.294 - mean_q: -112.622 Interval 9363 (4681000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9447 7 episodes - episode_reward: -208.674 [-287.915, -137.236] - loss: 10.771 - mae: 85.288 - mean_q: -112.583 Interval 9364 (4681500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2639 9 episodes - episode_reward: -173.118 [-226.039, -144.267] - loss: 14.735 - mae: 85.308 - mean_q: -112.545 Interval 9365 (4682000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9332 9 episodes - episode_reward: -174.398 [-302.542, -10.029] - loss: 10.027 - mae: 85.266 - mean_q: -112.524 Interval 9366 (4682500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4648 6 episodes - episode_reward: -200.855 [-264.916, -147.951] - loss: 12.066 - mae: 85.268 - mean_q: -112.529 Interval 9367 (4683000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3622 10 episodes - episode_reward: -166.399 [-205.292, -111.016] - loss: 8.043 - mae: 85.249 - mean_q: -112.535 Interval 9368 (4683500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3271 7 episodes - episode_reward: -159.721 [-231.603, -105.216] - loss: 12.558 - mae: 85.266 - mean_q: -112.554 Interval 9369 (4684000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8969 7 episodes - episode_reward: -217.273 [-287.274, -151.310] - loss: 9.660 - mae: 85.262 - mean_q: -112.552 Interval 9370 (4684500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3349 9 episodes - episode_reward: -183.664 [-241.476, -100.000] - loss: 11.212 - mae: 85.268 - mean_q: -112.538 Interval 9371 (4685000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1500 6 episodes - episode_reward: -175.273 [-274.624, -115.051] - loss: 13.746 - mae: 85.283 - mean_q: -112.531 Interval 9372 (4685500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2121 7 episodes - episode_reward: -151.747 [-252.265, -42.105] - loss: 9.876 - mae: 85.255 - mean_q: -112.517 Interval 9373 (4686000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0839 8 episodes - episode_reward: -205.002 [-288.825, -100.000] - loss: 10.464 - mae: 85.245 - mean_q: -112.527 Interval 9374 (4686500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2232 7 episodes - episode_reward: -158.209 [-209.997, -59.737] - loss: 9.847 - mae: 85.234 - mean_q: -112.541 Interval 9375 (4687000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0818 7 episodes - episode_reward: -148.434 [-185.494, -58.797] - loss: 9.603 - mae: 85.241 - mean_q: -112.544 Interval 9376 (4687500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4969 6 episodes - episode_reward: -200.735 [-256.005, -142.129] - loss: 10.740 - mae: 85.242 - mean_q: -112.533 Interval 9377 (4688000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4222 7 episodes - episode_reward: -164.698 [-218.054, -142.428] - loss: 7.385 - mae: 85.229 - mean_q: -112.536 Interval 9378 (4688500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1929 9 episodes - episode_reward: -189.408 [-261.750, -129.144] - loss: 10.182 - mae: 85.252 - mean_q: -112.527 Interval 9379 (4689000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2641 5 episodes - episode_reward: -321.518 [-532.096, -182.803] - loss: 10.007 - mae: 85.240 - mean_q: -112.517 Interval 9380 (4689500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0052 7 episodes - episode_reward: -213.647 [-263.647, -142.512] - loss: 8.165 - mae: 85.239 - mean_q: -112.515 Interval 9381 (4690000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0118 6 episodes - episode_reward: -159.841 [-246.202, -42.057] - loss: 9.750 - mae: 85.235 - mean_q: -112.538 Interval 9382 (4690500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5534 8 episodes - episode_reward: -169.653 [-208.351, -126.651] - loss: 10.342 - mae: 85.240 - mean_q: -112.554 Interval 9383 (4691000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5659 8 episodes - episode_reward: -151.913 [-221.565, -31.193] - loss: 11.602 - mae: 85.249 - mean_q: -112.547 Interval 9384 (4691500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9626 8 episodes - episode_reward: -180.903 [-281.328, -103.606] - loss: 11.835 - mae: 85.250 - mean_q: -112.527 Interval 9385 (4692000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1668 8 episodes - episode_reward: -208.246 [-388.679, -133.353] - loss: 11.185 - mae: 85.244 - mean_q: -112.518 Interval 9386 (4692500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5206 7 episodes - episode_reward: -164.817 [-260.419, 36.433] - loss: 11.283 - mae: 85.234 - mean_q: -112.532 Interval 9387 (4693000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8900 7 episodes - episode_reward: -217.838 [-300.440, -127.432] - loss: 13.037 - mae: 85.254 - mean_q: -112.520 Interval 9388 (4693500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1868 7 episodes - episode_reward: -231.811 [-295.425, -176.710] - loss: 10.456 - mae: 85.239 - mean_q: -112.516 Interval 9389 (4694000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9123 7 episodes - episode_reward: -205.313 [-285.289, -132.243] - loss: 12.922 - mae: 85.244 - mean_q: -112.506 Interval 9390 (4694500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9922 9 episodes - episode_reward: -169.948 [-315.010, -40.875] - loss: 8.871 - mae: 85.231 - mean_q: -112.498 Interval 9391 (4695000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0388 8 episodes - episode_reward: -184.766 [-261.200, -100.000] - loss: 10.943 - mae: 85.250 - mean_q: -112.484 Interval 9392 (4695500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6351 8 episodes - episode_reward: -169.654 [-216.184, -92.973] - loss: 8.890 - mae: 85.234 - mean_q: -112.497 Interval 9393 (4696000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1666 7 episodes - episode_reward: -228.126 [-305.858, -133.346] - loss: 9.714 - mae: 85.236 - mean_q: -112.490 Interval 9394 (4696500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7161 7 episodes - episode_reward: -190.708 [-261.538, -128.459] - loss: 9.108 - mae: 85.246 - mean_q: -112.524 Interval 9395 (4697000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5931 7 episodes - episode_reward: -183.840 [-232.174, -118.662] - loss: 8.634 - mae: 85.264 - mean_q: -112.563 Interval 9396 (4697500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7574 10 episodes - episode_reward: -184.335 [-253.465, -106.932] - loss: 8.368 - mae: 85.261 - mean_q: -112.573 Interval 9397 (4698000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7066 7 episodes - episode_reward: -183.317 [-221.211, -127.211] - loss: 13.289 - mae: 85.278 - mean_q: -112.586 Interval 9398 (4698500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1746 9 episodes - episode_reward: -188.503 [-259.597, -114.759] - loss: 13.944 - mae: 85.300 - mean_q: -112.584 Interval 9399 (4699000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9432 8 episodes - episode_reward: -181.110 [-287.145, -100.000] - loss: 8.951 - mae: 85.289 - mean_q: -112.578 Interval 9400 (4699500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2947 8 episodes - episode_reward: -210.419 [-392.718, -127.917] - loss: 14.758 - mae: 85.300 - mean_q: -112.578 Interval 9401 (4700000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5630 8 episodes - episode_reward: -155.606 [-199.018, -114.045] - loss: 12.230 - mae: 85.292 - mean_q: -112.574 Interval 9402 (4700500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8799 8 episodes - episode_reward: -184.394 [-273.397, -17.819] - loss: 10.908 - mae: 85.282 - mean_q: -112.572 Interval 9403 (4701000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8499 7 episodes - episode_reward: -186.338 [-239.728, -151.354] - loss: 10.318 - mae: 85.296 - mean_q: -112.612 Interval 9404 (4701500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6622 9 episodes - episode_reward: -158.925 [-225.626, -109.402] - loss: 9.091 - mae: 85.279 - mean_q: -112.624 Interval 9405 (4702000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7461 6 episodes - episode_reward: -210.147 [-303.124, -120.615] - loss: 12.572 - mae: 85.310 - mean_q: -112.630 Interval 9406 (4702500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4378 8 episodes - episode_reward: -167.217 [-256.733, -3.369] - loss: 11.624 - mae: 85.322 - mean_q: -112.601 Interval 9407 (4703000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6083 8 episodes - episode_reward: -163.622 [-229.134, -109.643] - loss: 12.857 - mae: 85.332 - mean_q: -112.600 Interval 9408 (4703500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6974 6 episodes - episode_reward: -122.148 [-191.886, 61.395] - loss: 12.718 - mae: 85.342 - mean_q: -112.579 Interval 9409 (4704000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2545 10 episodes - episode_reward: -169.848 [-240.531, -80.778] - loss: 10.844 - mae: 85.332 - mean_q: -112.589 Interval 9410 (4704500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9591 9 episodes - episode_reward: -165.165 [-179.069, -147.650] - loss: 13.780 - mae: 85.336 - mean_q: -112.597 Interval 9411 (4705000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9814 9 episodes - episode_reward: -167.738 [-313.209, -100.000] - loss: 10.813 - mae: 85.293 - mean_q: -112.592 Interval 9412 (4705500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1685 9 episodes - episode_reward: -179.538 [-238.811, -131.536] - loss: 9.384 - mae: 85.249 - mean_q: -112.597 Interval 9413 (4706000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6501 8 episodes - episode_reward: -160.676 [-231.998, -33.436] - loss: 8.715 - mae: 85.219 - mean_q: -112.630 Interval 9414 (4706500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7997 7 episodes - episode_reward: -197.522 [-268.961, -158.346] - loss: 11.223 - mae: 85.202 - mean_q: -112.661 Interval 9415 (4707000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5368 7 episodes - episode_reward: -181.307 [-221.584, -142.465] - loss: 12.446 - mae: 85.172 - mean_q: -112.630 Interval 9416 (4707500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.8985 5 episodes - episode_reward: -472.411 [-1353.339, -136.517] - loss: 11.858 - mae: 85.134 - mean_q: -112.576 Interval 9417 (4708000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.0541 9 episodes - episode_reward: -232.768 [-661.391, -40.463] - loss: 11.543 - mae: 85.137 - mean_q: -112.587 Interval 9418 (4708500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9514 8 episodes - episode_reward: -132.053 [-217.765, 8.954] - loss: 10.943 - mae: 85.143 - mean_q: -112.604 Interval 9419 (4709000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6631 7 episodes - episode_reward: -171.556 [-293.952, -110.857] - loss: 10.636 - mae: 85.152 - mean_q: -112.594 Interval 9420 (4709500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2349 8 episodes - episode_reward: -217.210 [-344.096, -160.056] - loss: 10.951 - mae: 85.174 - mean_q: -112.597 Interval 9421 (4710000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0708 7 episodes - episode_reward: -144.258 [-209.332, 33.077] - loss: 12.535 - mae: 85.188 - mean_q: -112.574 Interval 9422 (4710500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4898 7 episodes - episode_reward: -177.821 [-289.816, -58.229] - loss: 9.372 - mae: 85.161 - mean_q: -112.563 Interval 9423 (4711000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3702 6 episodes - episode_reward: -189.147 [-211.940, -148.400] - loss: 8.482 - mae: 85.139 - mean_q: -112.575 Interval 9424 (4711500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7046 8 episodes - episode_reward: -179.447 [-265.009, -8.959] - loss: 15.466 - mae: 85.164 - mean_q: -112.558 Interval 9425 (4712000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6208 7 episodes - episode_reward: -180.583 [-241.717, -90.511] - loss: 10.735 - mae: 85.135 - mean_q: -112.530 Interval 9426 (4712500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6524 9 episodes - episode_reward: -151.071 [-239.951, -100.000] - loss: 10.505 - mae: 85.155 - mean_q: -112.534 Interval 9427 (4713000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3337 8 episodes - episode_reward: -190.282 [-359.237, -100.000] - loss: 9.625 - mae: 85.128 - mean_q: -112.525 Interval 9428 (4713500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0125 7 episodes - episode_reward: -236.381 [-367.562, -168.857] - loss: 9.434 - mae: 85.130 - mean_q: -112.520 Interval 9429 (4714000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7367 7 episodes - episode_reward: -178.478 [-241.440, -52.048] - loss: 11.773 - mae: 85.129 - mean_q: -112.507 Interval 9430 (4714500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3513 10 episodes - episode_reward: -180.369 [-267.708, -106.266] - loss: 11.194 - mae: 85.140 - mean_q: -112.492 Interval 9431 (4715000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9285 6 episodes - episode_reward: -157.260 [-197.158, -22.256] - loss: 9.401 - mae: 85.134 - mean_q: -112.485 Interval 9432 (4715500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6789 7 episodes - episode_reward: -180.196 [-302.535, -76.107] - loss: 10.410 - mae: 85.132 - mean_q: -112.466 Interval 9433 (4716000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8308 6 episodes - episode_reward: -251.590 [-307.385, -143.400] - loss: 11.641 - mae: 85.141 - mean_q: -112.461 Interval 9434 (4716500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0763 7 episodes - episode_reward: -217.507 [-407.794, -135.201] - loss: 9.529 - mae: 85.112 - mean_q: -112.452 Interval 9435 (4717000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4971 7 episodes - episode_reward: -178.913 [-238.497, -128.600] - loss: 13.336 - mae: 85.120 - mean_q: -112.455 Interval 9436 (4717500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0283 7 episodes - episode_reward: -214.931 [-264.354, -179.189] - loss: 14.012 - mae: 85.126 - mean_q: -112.424 Interval 9437 (4718000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3089 8 episodes - episode_reward: -204.332 [-373.657, -126.035] - loss: 8.900 - mae: 85.115 - mean_q: -112.413 Interval 9438 (4718500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9242 8 episodes - episode_reward: -186.294 [-309.248, 20.637] - loss: 10.483 - mae: 85.108 - mean_q: -112.421 Interval 9439 (4719000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3221 7 episodes - episode_reward: -239.895 [-361.748, -177.159] - loss: 11.668 - mae: 85.121 - mean_q: -112.435 Interval 9440 (4719500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7016 7 episodes - episode_reward: -193.325 [-267.972, -147.115] - loss: 8.956 - mae: 85.109 - mean_q: -112.452 Interval 9441 (4720000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4927 7 episodes - episode_reward: -175.327 [-209.159, -140.172] - loss: 11.656 - mae: 85.122 - mean_q: -112.456 Interval 9442 (4720500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5642 7 episodes - episode_reward: -180.145 [-345.952, -114.647] - loss: 11.725 - mae: 85.095 - mean_q: -112.434 Interval 9443 (4721000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9201 7 episodes - episode_reward: -135.325 [-183.141, 2.639] - loss: 9.749 - mae: 85.061 - mean_q: -112.412 Interval 9444 (4721500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2652 9 episodes - episode_reward: -180.099 [-241.164, -149.299] - loss: 9.678 - mae: 85.031 - mean_q: -112.434 Interval 9445 (4722000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2910 10 episodes - episode_reward: -169.922 [-218.491, -103.145] - loss: 8.308 - mae: 85.004 - mean_q: -112.441 Interval 9446 (4722500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9678 6 episodes - episode_reward: -159.906 [-394.014, 75.052] - loss: 11.954 - mae: 84.994 - mean_q: -112.451 Interval 9447 (4723000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2598 7 episodes - episode_reward: -168.567 [-239.627, -110.167] - loss: 12.477 - mae: 84.969 - mean_q: -112.413 Interval 9448 (4723500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3305 8 episodes - episode_reward: -137.724 [-233.098, 6.492] - loss: 13.598 - mae: 84.952 - mean_q: -112.381 Interval 9449 (4724000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6796 8 episodes - episode_reward: -169.445 [-291.549, -62.175] - loss: 13.046 - mae: 84.937 - mean_q: -112.358 Interval 9450 (4724500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1452 10 episodes - episode_reward: -157.808 [-294.174, -92.103] - loss: 12.122 - mae: 84.918 - mean_q: -112.328 Interval 9451 (4725000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3003 8 episodes - episode_reward: -207.043 [-258.297, -100.000] - loss: 10.849 - mae: 84.886 - mean_q: -112.332 Interval 9452 (4725500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9018 8 episodes - episode_reward: -179.300 [-233.616, -124.531] - loss: 10.011 - mae: 84.864 - mean_q: -112.336 Interval 9453 (4726000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3588 7 episodes - episode_reward: -172.261 [-233.375, -48.239] - loss: 16.445 - mae: 84.877 - mean_q: -112.342 Interval 9454 (4726500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7996 7 episodes - episode_reward: -176.244 [-224.961, -149.547] - loss: 15.651 - mae: 84.855 - mean_q: -112.290 Interval 9455 (4727000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9512 8 episodes - episode_reward: -191.188 [-250.293, -139.255] - loss: 11.792 - mae: 84.797 - mean_q: -112.269 Interval 9456 (4727500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6563 8 episodes - episode_reward: -177.067 [-245.411, -56.334] - loss: 14.916 - mae: 84.809 - mean_q: -112.260 Interval 9457 (4728000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0543 9 episodes - episode_reward: -171.993 [-216.118, -125.978] - loss: 8.804 - mae: 84.783 - mean_q: -112.227 Interval 9458 (4728500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7045 10 episodes - episode_reward: -186.608 [-272.916, -100.000] - loss: 10.438 - mae: 84.763 - mean_q: -112.215 Interval 9459 (4729000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6443 9 episodes - episode_reward: -145.346 [-208.409, 13.292] - loss: 14.773 - mae: 84.783 - mean_q: -112.202 Interval 9460 (4729500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9852 7 episodes - episode_reward: -209.140 [-307.258, -120.438] - loss: 12.115 - mae: 84.772 - mean_q: -112.179 Interval 9461 (4730000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2782 5 episodes - episode_reward: -220.188 [-278.672, -168.812] - loss: 11.192 - mae: 84.751 - mean_q: -112.178 Interval 9462 (4730500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0155 8 episodes - episode_reward: -191.123 [-343.364, -132.518] - loss: 11.610 - mae: 84.728 - mean_q: -112.138 Interval 9463 (4731000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7151 8 episodes - episode_reward: -176.830 [-206.824, -150.126] - loss: 10.666 - mae: 84.710 - mean_q: -112.115 Interval 9464 (4731500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5217 9 episodes - episode_reward: -135.730 [-235.067, -23.388] - loss: 11.839 - mae: 84.704 - mean_q: -112.095 Interval 9465 (4732000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6839 7 episodes - episode_reward: -187.806 [-233.543, -142.760] - loss: 9.933 - mae: 84.679 - mean_q: -112.094 Interval 9466 (4732500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0508 8 episodes - episode_reward: -198.999 [-240.492, -162.537] - loss: 13.650 - mae: 84.695 - mean_q: -112.089 Interval 9467 (4733000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9359 8 episodes - episode_reward: -179.783 [-234.852, -115.472] - loss: 13.686 - mae: 84.679 - mean_q: -112.053 Interval 9468 (4733500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9618 8 episodes - episode_reward: -181.594 [-233.342, -124.163] - loss: 11.305 - mae: 84.649 - mean_q: -112.046 Interval 9469 (4734000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9802 8 episodes - episode_reward: -189.497 [-235.314, -154.209] - loss: 15.027 - mae: 84.631 - mean_q: -112.017 Interval 9470 (4734500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9859 8 episodes - episode_reward: -174.577 [-210.554, -139.150] - loss: 11.707 - mae: 84.602 - mean_q: -111.982 Interval 9471 (4735000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8791 8 episodes - episode_reward: -190.760 [-297.137, -51.905] - loss: 12.665 - mae: 84.604 - mean_q: -111.987 Interval 9472 (4735500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5661 7 episodes - episode_reward: -181.399 [-275.375, -145.863] - loss: 19.209 - mae: 84.614 - mean_q: -111.961 Interval 9473 (4736000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2817 9 episodes - episode_reward: -187.714 [-240.683, -140.036] - loss: 13.916 - mae: 84.592 - mean_q: -111.912 Interval 9474 (4736500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9552 8 episodes - episode_reward: -174.316 [-220.843, -100.000] - loss: 12.523 - mae: 84.550 - mean_q: -111.893 Interval 9475 (4737000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9360 9 episodes - episode_reward: -172.265 [-217.104, -39.617] - loss: 11.005 - mae: 84.547 - mean_q: -111.876 Interval 9476 (4737500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1689 7 episodes - episode_reward: -149.800 [-218.061, -9.131] - loss: 14.174 - mae: 84.531 - mean_q: -111.878 Interval 9477 (4738000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4943 9 episodes - episode_reward: -143.336 [-202.874, 14.244] - loss: 12.775 - mae: 84.506 - mean_q: -111.877 Interval 9478 (4738500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8593 9 episodes - episode_reward: -208.962 [-328.826, -100.000] - loss: 14.602 - mae: 84.503 - mean_q: -111.871 Interval 9479 (4739000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5844 7 episodes - episode_reward: -179.015 [-268.531, 4.553] - loss: 13.097 - mae: 84.461 - mean_q: -111.845 Interval 9480 (4739500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.6826 6 episodes - episode_reward: -154.952 [-264.580, 2.489] - loss: 13.496 - mae: 84.447 - mean_q: -111.822 Interval 9481 (4740000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2355 8 episodes - episode_reward: -200.481 [-269.272, -141.039] - loss: 10.795 - mae: 84.430 - mean_q: -111.811 Interval 9482 (4740500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6048 8 episodes - episode_reward: -163.945 [-243.746, -131.062] - loss: 9.823 - mae: 84.415 - mean_q: -111.816 Interval 9483 (4741000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4338 6 episodes - episode_reward: -196.916 [-283.799, -149.005] - loss: 12.943 - mae: 84.433 - mean_q: -111.800 Interval 9484 (4741500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7333 8 episodes - episode_reward: -174.449 [-251.223, -89.319] - loss: 10.761 - mae: 84.408 - mean_q: -111.811 Interval 9485 (4742000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0776 8 episodes - episode_reward: -182.666 [-253.002, -137.212] - loss: 13.074 - mae: 84.416 - mean_q: -111.785 Interval 9486 (4742500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9349 7 episodes - episode_reward: -208.194 [-282.409, -152.000] - loss: 10.393 - mae: 84.391 - mean_q: -111.804 Interval 9487 (4743000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0580 8 episodes - episode_reward: -199.762 [-287.056, -7.176] - loss: 11.905 - mae: 84.392 - mean_q: -111.786 Interval 9488 (4743500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -6.6795 7 episodes - episode_reward: -427.115 [-888.204, -187.586] - loss: 10.431 - mae: 84.378 - mean_q: -111.780 Interval 9489 (4744000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3845 9 episodes - episode_reward: -227.431 [-513.396, -144.526] - loss: 8.445 - mae: 84.392 - mean_q: -111.795 Interval 9490 (4744500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1042 7 episodes - episode_reward: -140.151 [-193.916, -10.633] - loss: 11.539 - mae: 84.409 - mean_q: -111.787 Interval 9491 (4745000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8380 8 episodes - episode_reward: -183.691 [-258.137, -144.301] - loss: 9.316 - mae: 84.422 - mean_q: -111.792 Interval 9492 (4745500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0470 7 episodes - episode_reward: -205.535 [-243.292, -136.552] - loss: 13.129 - mae: 84.472 - mean_q: -111.761 Interval 9493 (4746000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7293 8 episodes - episode_reward: -243.055 [-403.068, -142.411] - loss: 12.835 - mae: 84.481 - mean_q: -111.732 Interval 9494 (4746500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0418 7 episodes - episode_reward: -220.227 [-344.871, -136.705] - loss: 13.687 - mae: 84.488 - mean_q: -111.715 Interval 9495 (4747000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1200 7 episodes - episode_reward: -157.668 [-276.065, -26.127] - loss: 10.251 - mae: 84.498 - mean_q: -111.724 Interval 9496 (4747500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3817 7 episodes - episode_reward: -166.037 [-219.006, -93.492] - loss: 11.220 - mae: 84.517 - mean_q: -111.740 Interval 9497 (4748000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7696 7 episodes - episode_reward: -192.302 [-229.242, -140.684] - loss: 10.392 - mae: 84.518 - mean_q: -111.728 Interval 9498 (4748500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6519 7 episodes - episode_reward: -188.136 [-293.339, -133.766] - loss: 11.994 - mae: 84.531 - mean_q: -111.725 Interval 9499 (4749000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8440 9 episodes - episode_reward: -216.759 [-299.324, -144.738] - loss: 12.946 - mae: 84.536 - mean_q: -111.732 Interval 9500 (4749500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7469 7 episodes - episode_reward: -180.679 [-213.048, -83.949] - loss: 13.342 - mae: 84.552 - mean_q: -111.731 Interval 9501 (4750000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3329 9 episodes - episode_reward: -197.046 [-237.351, -171.441] - loss: 10.576 - mae: 84.553 - mean_q: -111.744 Interval 9502 (4750500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8801 9 episodes - episode_reward: -156.661 [-192.298, -100.000] - loss: 15.401 - mae: 84.593 - mean_q: -111.701 Interval 9503 (4751000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5878 10 episodes - episode_reward: -178.783 [-242.482, -108.253] - loss: 10.534 - mae: 84.571 - mean_q: -111.662 Interval 9504 (4751500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6194 8 episodes - episode_reward: -172.915 [-210.693, -142.311] - loss: 10.930 - mae: 84.561 - mean_q: -111.689 Interval 9505 (4752000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6885 7 episodes - episode_reward: -188.329 [-262.166, -117.561] - loss: 15.290 - mae: 84.589 - mean_q: -111.682 Interval 9506 (4752500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1934 7 episodes - episode_reward: -156.615 [-256.480, -63.989] - loss: 14.758 - mae: 84.586 - mean_q: -111.668 Interval 9507 (4753000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8289 8 episodes - episode_reward: -167.302 [-197.407, -100.000] - loss: 15.403 - mae: 84.588 - mean_q: -111.655 Interval 9508 (4753500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1398 9 episodes - episode_reward: -186.502 [-288.172, -105.371] - loss: 14.383 - mae: 84.580 - mean_q: -111.639 Interval 9509 (4754000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9173 7 episodes - episode_reward: -204.556 [-296.760, -117.746] - loss: 14.088 - mae: 84.587 - mean_q: -111.625 Interval 9510 (4754500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9036 8 episodes - episode_reward: -176.522 [-226.823, -57.521] - loss: 12.638 - mae: 84.579 - mean_q: -111.611 Interval 9511 (4755000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3540 7 episodes - episode_reward: -174.729 [-293.678, -54.409] - loss: 12.050 - mae: 84.569 - mean_q: -111.616 Interval 9512 (4755500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3222 9 episodes - episode_reward: -184.150 [-226.148, -111.712] - loss: 10.800 - mae: 84.554 - mean_q: -111.614 Interval 9513 (4756000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3901 10 episodes - episode_reward: -168.487 [-255.430, -93.427] - loss: 14.480 - mae: 84.571 - mean_q: -111.616 Interval 9514 (4756500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6398 7 episodes - episode_reward: -179.855 [-216.782, -112.907] - loss: 16.296 - mae: 84.562 - mean_q: -111.593 Interval 9515 (4757000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5229 7 episodes - episode_reward: -192.544 [-284.929, -126.820] - loss: 15.835 - mae: 84.532 - mean_q: -111.570 Interval 9516 (4757500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5280 7 episodes - episode_reward: -179.327 [-283.042, -45.483] - loss: 13.127 - mae: 84.487 - mean_q: -111.568 Interval 9517 (4758000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9099 8 episodes - episode_reward: -180.206 [-255.362, -134.678] - loss: 11.594 - mae: 84.441 - mean_q: -111.583 Interval 9518 (4758500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.0170 4 episodes - episode_reward: -274.560 [-490.387, -158.317] - loss: 11.536 - mae: 84.416 - mean_q: -111.575 Interval 9519 (4759000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.5703 5 episodes - episode_reward: -936.473 [-2775.615, -368.363] - loss: 8.840 - mae: 84.415 - mean_q: -111.598 Interval 9520 (4759500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8842 8 episodes - episode_reward: -176.485 [-268.237, -100.000] - loss: 11.842 - mae: 84.473 - mean_q: -111.639 Interval 9521 (4760000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5538 7 episodes - episode_reward: -186.399 [-252.494, 10.770] - loss: 8.011 - mae: 84.490 - mean_q: -111.666 Interval 9522 (4760500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4780 8 episodes - episode_reward: -146.879 [-205.171, -85.700] - loss: 10.163 - mae: 84.539 - mean_q: -111.718 Interval 9523 (4761000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3382 7 episodes - episode_reward: -175.405 [-247.579, 28.350] - loss: 10.570 - mae: 84.600 - mean_q: -111.753 Interval 9524 (4761500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2978 9 episodes - episode_reward: -188.053 [-247.268, -127.519] - loss: 11.704 - mae: 84.613 - mean_q: -111.791 Interval 9525 (4762000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8904 8 episodes - episode_reward: -161.828 [-229.325, -100.000] - loss: 10.518 - mae: 84.630 - mean_q: -111.844 Interval 9526 (4762500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5981 9 episodes - episode_reward: -156.467 [-252.632, 43.627] - loss: 13.373 - mae: 84.670 - mean_q: -111.871 Interval 9527 (4763000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9842 6 episodes - episode_reward: -158.764 [-273.324, 37.552] - loss: 13.172 - mae: 84.685 - mean_q: -111.862 Interval 9528 (4763500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1438 7 episodes - episode_reward: -150.452 [-223.290, -25.816] - loss: 10.888 - mae: 84.723 - mean_q: -111.880 Interval 9529 (4764000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8528 8 episodes - episode_reward: -185.856 [-265.975, -100.000] - loss: 13.168 - mae: 84.752 - mean_q: -111.878 Interval 9530 (4764500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8042 6 episodes - episode_reward: -240.065 [-338.051, -177.518] - loss: 10.069 - mae: 84.748 - mean_q: -111.882 Interval 9531 (4765000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8840 8 episodes - episode_reward: -174.914 [-220.504, -135.517] - loss: 12.278 - mae: 84.784 - mean_q: -111.900 Interval 9532 (4765500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9606 9 episodes - episode_reward: -169.222 [-233.877, -112.669] - loss: 11.446 - mae: 84.813 - mean_q: -111.900 Interval 9533 (4766000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9752 8 episodes - episode_reward: -181.808 [-246.968, -120.080] - loss: 13.620 - mae: 84.829 - mean_q: -111.908 Interval 9534 (4766500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5637 8 episodes - episode_reward: -167.975 [-238.061, -102.244] - loss: 14.698 - mae: 84.853 - mean_q: -111.905 Interval 9535 (4767000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5520 7 episodes - episode_reward: -175.171 [-217.154, -112.124] - loss: 7.704 - mae: 84.834 - mean_q: -111.923 Interval 9536 (4767500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6675 7 episodes - episode_reward: -183.892 [-403.624, -109.831] - loss: 10.329 - mae: 84.851 - mean_q: -111.952 Interval 9537 (4768000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8658 7 episodes - episode_reward: -197.151 [-256.103, -147.877] - loss: 17.515 - mae: 84.895 - mean_q: -111.937 Interval 9538 (4768500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6158 9 episodes - episode_reward: -205.587 [-340.247, -105.393] - loss: 12.696 - mae: 84.897 - mean_q: -111.903 Interval 9539 (4769000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4940 8 episodes - episode_reward: -160.804 [-224.196, -57.204] - loss: 10.225 - mae: 84.882 - mean_q: -111.890 Interval 9540 (4769500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1548 7 episodes - episode_reward: -150.011 [-297.871, -27.688] - loss: 16.242 - mae: 84.897 - mean_q: -111.903 Interval 9541 (4770000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8376 9 episodes - episode_reward: -217.351 [-386.910, -100.000] - loss: 16.989 - mae: 84.907 - mean_q: -111.865 Interval 9542 (4770500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2330 7 episodes - episode_reward: -158.183 [-197.794, -76.180] - loss: 12.956 - mae: 84.885 - mean_q: -111.829 Interval 9543 (4771000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5038 7 episodes - episode_reward: -173.639 [-225.524, -114.488] - loss: 12.064 - mae: 84.864 - mean_q: -111.824 Interval 9544 (4771500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8774 7 episodes - episode_reward: -210.779 [-275.234, -153.464] - loss: 11.813 - mae: 84.875 - mean_q: -111.831 Interval 9545 (4772000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8966 8 episodes - episode_reward: -177.632 [-267.332, -105.387] - loss: 8.626 - mae: 84.857 - mean_q: -111.845 Interval 9546 (4772500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5962 6 episodes - episode_reward: -221.012 [-328.438, -142.645] - loss: 12.204 - mae: 84.871 - mean_q: -111.858 Interval 9547 (4773000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4224 9 episodes - episode_reward: -194.876 [-279.391, -137.604] - loss: 15.004 - mae: 84.870 - mean_q: -111.858 Interval 9548 (4773500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5441 5 episodes - episode_reward: -244.485 [-423.239, -161.351] - loss: 13.116 - mae: 84.877 - mean_q: -111.839 Interval 9549 (4774000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.2243 9 episodes - episode_reward: -180.811 [-229.465, -129.775] - loss: 10.617 - mae: 84.868 - mean_q: -111.875 Interval 9550 (4774500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2248 8 episodes - episode_reward: -206.672 [-445.244, -140.898] - loss: 12.414 - mae: 84.873 - mean_q: -111.867 Interval 9551 (4775000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3503 6 episodes - episode_reward: -174.393 [-263.245, -127.582] - loss: 9.654 - mae: 84.842 - mean_q: -111.896 Interval 9552 (4775500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4326 8 episodes - episode_reward: -169.668 [-222.223, -145.280] - loss: 10.146 - mae: 84.838 - mean_q: -111.937 Interval 9553 (4776000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7536 8 episodes - episode_reward: -161.807 [-237.519, -113.207] - loss: 14.598 - mae: 84.879 - mean_q: -111.940 Interval 9554 (4776500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8746 8 episodes - episode_reward: -170.545 [-242.772, -100.000] - loss: 13.097 - mae: 84.879 - mean_q: -111.941 Interval 9555 (4777000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9401 9 episodes - episode_reward: -174.085 [-271.550, -107.391] - loss: 9.312 - mae: 84.851 - mean_q: -111.937 Interval 9556 (4777500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8002 9 episodes - episode_reward: -156.069 [-279.270, -31.156] - loss: 7.531 - mae: 84.864 - mean_q: -111.996 Interval 9557 (4778000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8542 8 episodes - episode_reward: -183.514 [-281.215, -123.286] - loss: 10.716 - mae: 84.897 - mean_q: -112.029 Interval 9558 (4778500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6963 7 episodes - episode_reward: -186.545 [-212.667, -151.785] - loss: 12.945 - mae: 84.924 - mean_q: -112.020 Interval 9559 (4779000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3654 7 episodes - episode_reward: -172.912 [-326.660, -113.830] - loss: 10.514 - mae: 84.931 - mean_q: -112.043 Interval 9560 (4779500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3217 7 episodes - episode_reward: -164.778 [-221.501, -100.000] - loss: 12.988 - mae: 84.937 - mean_q: -112.055 Interval 9561 (4780000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0224 9 episodes - episode_reward: -159.153 [-231.641, -100.000] - loss: 12.696 - mae: 84.948 - mean_q: -112.028 Interval 9562 (4780500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1715 9 episodes - episode_reward: -185.576 [-224.526, -143.372] - loss: 13.665 - mae: 84.972 - mean_q: -112.020 Interval 9563 (4781000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4454 7 episodes - episode_reward: -168.336 [-285.928, -1.569] - loss: 15.943 - mae: 84.977 - mean_q: -111.997 Interval 9564 (4781500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3617 9 episodes - episode_reward: -186.583 [-257.636, -143.295] - loss: 10.158 - mae: 84.949 - mean_q: -111.974 Interval 9565 (4782000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9578 7 episodes - episode_reward: -199.306 [-401.149, -147.775] - loss: 13.927 - mae: 84.967 - mean_q: -111.951 Interval 9566 (4782500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8314 8 episodes - episode_reward: -185.607 [-288.584, -138.121] - loss: 15.037 - mae: 84.965 - mean_q: -111.937 Interval 9567 (4783000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7087 8 episodes - episode_reward: -173.739 [-247.672, 14.590] - loss: 12.285 - mae: 84.929 - mean_q: -111.917 Interval 9568 (4783500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8026 8 episodes - episode_reward: -178.673 [-215.009, -127.914] - loss: 11.283 - mae: 84.926 - mean_q: -111.941 Interval 9569 (4784000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5817 6 episodes - episode_reward: -195.012 [-242.172, -143.772] - loss: 14.490 - mae: 84.948 - mean_q: -111.943 Interval 9570 (4784500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9252 9 episodes - episode_reward: -175.496 [-246.187, -100.000] - loss: 10.880 - mae: 84.937 - mean_q: -111.916 Interval 9571 (4785000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9051 9 episodes - episode_reward: -160.398 [-199.499, -100.000] - loss: 9.967 - mae: 84.940 - mean_q: -111.937 Interval 9572 (4785500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2532 7 episodes - episode_reward: -161.541 [-202.902, -31.325] - loss: 11.987 - mae: 84.962 - mean_q: -111.931 Interval 9573 (4786000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6047 8 episodes - episode_reward: -164.541 [-225.531, -109.876] - loss: 11.773 - mae: 84.973 - mean_q: -111.915 Interval 9574 (4786500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1877 9 episodes - episode_reward: -173.263 [-227.131, -100.000] - loss: 11.898 - mae: 84.991 - mean_q: -111.882 Interval 9575 (4787000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0211 10 episodes - episode_reward: -154.722 [-210.113, -100.000] - loss: 13.754 - mae: 84.990 - mean_q: -111.850 Interval 9576 (4787500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9468 7 episodes - episode_reward: -197.291 [-267.804, -147.767] - loss: 10.347 - mae: 84.959 - mean_q: -111.841 Interval 9577 (4788000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0381 9 episodes - episode_reward: -178.237 [-249.489, -102.656] - loss: 9.953 - mae: 84.957 - mean_q: -111.844 Interval 9578 (4788500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5154 7 episodes - episode_reward: -177.260 [-321.854, -135.730] - loss: 10.468 - mae: 84.942 - mean_q: -111.851 Interval 9579 (4789000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9603 9 episodes - episode_reward: -162.966 [-231.431, -100.000] - loss: 10.425 - mae: 84.930 - mean_q: -111.840 Interval 9580 (4789500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3231 6 episodes - episode_reward: -179.305 [-261.672, -127.569] - loss: 10.023 - mae: 84.913 - mean_q: -111.854 Interval 9581 (4790000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9334 8 episodes - episode_reward: -194.316 [-311.654, -103.854] - loss: 11.729 - mae: 84.913 - mean_q: -111.855 Interval 9582 (4790500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2385 6 episodes - episode_reward: -179.248 [-231.680, -141.373] - loss: 12.131 - mae: 84.919 - mean_q: -111.834 Interval 9583 (4791000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3275 9 episodes - episode_reward: -187.229 [-252.892, -141.265] - loss: 10.029 - mae: 84.913 - mean_q: -111.844 Interval 9584 (4791500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6072 8 episodes - episode_reward: -168.545 [-225.749, -135.364] - loss: 12.424 - mae: 84.916 - mean_q: -111.849 Interval 9585 (4792000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4624 11 episodes - episode_reward: -152.636 [-253.504, -116.175] - loss: 11.668 - mae: 84.905 - mean_q: -111.834 Interval 9586 (4792500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2781 8 episodes - episode_reward: -152.646 [-221.919, 26.114] - loss: 12.731 - mae: 84.894 - mean_q: -111.835 Interval 9587 (4793000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9810 6 episodes - episode_reward: -158.651 [-197.429, -122.698] - loss: 15.764 - mae: 84.885 - mean_q: -111.785 Interval 9588 (4793500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7971 8 episodes - episode_reward: -172.114 [-243.801, -128.505] - loss: 10.359 - mae: 84.841 - mean_q: -111.777 Interval 9589 (4794000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4942 7 episodes - episode_reward: -175.138 [-289.840, -100.000] - loss: 11.629 - mae: 84.810 - mean_q: -111.774 Interval 9590 (4794500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8586 7 episodes - episode_reward: -202.102 [-383.660, -136.271] - loss: 9.914 - mae: 84.768 - mean_q: -111.762 Interval 9591 (4795000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4761 7 episodes - episode_reward: -177.333 [-235.580, -121.561] - loss: 10.168 - mae: 84.746 - mean_q: -111.767 Interval 9592 (4795500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6398 7 episodes - episode_reward: -198.596 [-308.020, -163.803] - loss: 10.746 - mae: 84.713 - mean_q: -111.757 Interval 9593 (4796000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4258 9 episodes - episode_reward: -134.662 [-180.182, -100.000] - loss: 10.372 - mae: 84.681 - mean_q: -111.729 Interval 9594 (4796500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8677 9 episodes - episode_reward: -147.147 [-247.440, -95.541] - loss: 7.795 - mae: 84.644 - mean_q: -111.728 Interval 9595 (4797000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5419 10 episodes - episode_reward: -186.964 [-305.095, -135.017] - loss: 11.552 - mae: 84.644 - mean_q: -111.725 Interval 9596 (4797500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2157 8 episodes - episode_reward: -202.070 [-254.265, -140.289] - loss: 9.957 - mae: 84.631 - mean_q: -111.683 Interval 9597 (4798000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2653 7 episodes - episode_reward: -150.763 [-309.629, -1.926] - loss: 11.614 - mae: 84.621 - mean_q: -111.674 Interval 9598 (4798500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3363 8 episodes - episode_reward: -155.454 [-206.114, -6.575] - loss: 15.126 - mae: 84.618 - mean_q: -111.637 Interval 9599 (4799000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8180 8 episodes - episode_reward: -174.421 [-242.887, -130.349] - loss: 11.031 - mae: 84.579 - mean_q: -111.581 Interval 9600 (4799500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1493 8 episodes - episode_reward: -136.994 [-212.088, 47.429] - loss: 9.978 - mae: 84.532 - mean_q: -111.552 Interval 9601 (4800000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7477 9 episodes - episode_reward: -147.932 [-229.436, -65.595] - loss: 10.106 - mae: 84.518 - mean_q: -111.546 Interval 9602 (4800500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4159 8 episodes - episode_reward: -154.399 [-180.707, -95.518] - loss: 8.405 - mae: 84.490 - mean_q: -111.520 Interval 9603 (4801000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9935 7 episodes - episode_reward: -207.740 [-273.499, -126.993] - loss: 8.844 - mae: 84.462 - mean_q: -111.516 Interval 9604 (4801500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9941 8 episodes - episode_reward: -188.219 [-237.850, -144.280] - loss: 10.850 - mae: 84.475 - mean_q: -111.487 Interval 9605 (4802000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1914 8 episodes - episode_reward: -195.969 [-249.367, -142.385] - loss: 12.749 - mae: 84.452 - mean_q: -111.452 Interval 9606 (4802500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0263 8 episodes - episode_reward: -195.135 [-270.461, -100.000] - loss: 11.932 - mae: 84.438 - mean_q: -111.410 Interval 9607 (4803000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1622 9 episodes - episode_reward: -173.536 [-250.698, -90.694] - loss: 14.042 - mae: 84.412 - mean_q: -111.369 Interval 9608 (4803500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3529 10 episodes - episode_reward: -119.172 [-185.921, -43.936] - loss: 11.862 - mae: 84.376 - mean_q: -111.327 Interval 9609 (4804000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1217 6 episodes - episode_reward: -175.822 [-201.624, -135.346] - loss: 11.061 - mae: 84.357 - mean_q: -111.293 Interval 9610 (4804500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4216 7 episodes - episode_reward: -170.857 [-226.569, -125.005] - loss: 13.767 - mae: 84.335 - mean_q: -111.241 Interval 9611 (4805000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1500 9 episodes - episode_reward: -177.269 [-217.805, -138.745] - loss: 9.355 - mae: 84.291 - mean_q: -111.237 Interval 9612 (4805500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0821 8 episodes - episode_reward: -194.259 [-281.921, -120.340] - loss: 9.553 - mae: 84.247 - mean_q: -111.245 Interval 9613 (4806000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7256 7 episodes - episode_reward: -184.213 [-262.586, -100.000] - loss: 10.647 - mae: 84.233 - mean_q: -111.213 Interval 9614 (4806500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1652 8 episodes - episode_reward: -143.379 [-194.916, -71.721] - loss: 13.187 - mae: 84.226 - mean_q: -111.193 Interval 9615 (4807000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5982 10 episodes - episode_reward: -182.336 [-255.319, -139.442] - loss: 9.859 - mae: 84.185 - mean_q: -111.159 Interval 9616 (4807500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3461 6 episodes - episode_reward: -197.576 [-316.737, -47.246] - loss: 11.343 - mae: 84.176 - mean_q: -111.151 Interval 9617 (4808000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3606 6 episodes - episode_reward: -179.651 [-262.866, -131.363] - loss: 13.382 - mae: 84.168 - mean_q: -111.114 Interval 9618 (4808500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9781 9 episodes - episode_reward: -173.181 [-270.244, -113.788] - loss: 11.160 - mae: 84.097 - mean_q: -111.075 Interval 9619 (4809000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6672 6 episodes - episode_reward: -203.686 [-231.388, -154.232] - loss: 10.253 - mae: 84.040 - mean_q: -111.048 Interval 9620 (4809500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4184 7 episodes - episode_reward: -181.510 [-241.102, -151.802] - loss: 9.418 - mae: 83.975 - mean_q: -111.040 Interval 9621 (4810000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4715 6 episodes - episode_reward: -200.546 [-274.322, -130.016] - loss: 13.232 - mae: 83.931 - mean_q: -111.010 Interval 9622 (4810500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.6716 12 episodes - episode_reward: -156.078 [-213.591, -100.000] - loss: 7.919 - mae: 83.864 - mean_q: -111.002 Interval 9623 (4811000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2388 8 episodes - episode_reward: -146.800 [-287.967, 38.474] - loss: 10.350 - mae: 83.815 - mean_q: -111.006 Interval 9624 (4811500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8852 7 episodes - episode_reward: -197.564 [-337.868, -126.523] - loss: 13.957 - mae: 83.780 - mean_q: -110.967 Interval 9625 (4812000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8995 8 episodes - episode_reward: -185.885 [-280.278, -138.235] - loss: 12.286 - mae: 83.737 - mean_q: -110.936 Interval 9626 (4812500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6138 7 episodes - episode_reward: -167.412 [-306.378, 48.531] - loss: 13.757 - mae: 83.713 - mean_q: -110.897 Interval 9627 (4813000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.5853 7 episodes - episode_reward: -405.657 [-621.823, -207.044] - loss: 9.399 - mae: 83.654 - mean_q: -110.846 Interval 9628 (4813500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.7628 6 episodes - episode_reward: -500.616 [-658.072, -353.065] - loss: 13.136 - mae: 83.640 - mean_q: -110.800 Interval 9629 (4814000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0845 7 episodes - episode_reward: -221.379 [-380.752, -104.914] - loss: 10.275 - mae: 83.642 - mean_q: -110.811 Interval 9630 (4814500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1442 9 episodes - episode_reward: -168.635 [-226.846, -94.033] - loss: 8.946 - mae: 83.643 - mean_q: -110.818 Interval 9631 (4815000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8211 8 episodes - episode_reward: -170.765 [-233.880, -19.890] - loss: 10.972 - mae: 83.654 - mean_q: -110.808 Interval 9632 (4815500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1751 7 episodes - episode_reward: -228.217 [-376.401, -121.906] - loss: 12.756 - mae: 83.678 - mean_q: -110.791 Interval 9633 (4816000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7334 10 episodes - episode_reward: -189.472 [-272.046, -100.000] - loss: 11.862 - mae: 83.683 - mean_q: -110.769 Interval 9634 (4816500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9628 7 episodes - episode_reward: -219.875 [-256.926, -173.512] - loss: 11.822 - mae: 83.691 - mean_q: -110.761 Interval 9635 (4817000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7040 7 episodes - episode_reward: -167.542 [-209.350, -100.000] - loss: 11.371 - mae: 83.681 - mean_q: -110.736 Interval 9636 (4817500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1608 8 episodes - episode_reward: -218.569 [-343.290, -147.781] - loss: 10.809 - mae: 83.665 - mean_q: -110.766 Interval 9637 (4818000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6804 7 episodes - episode_reward: -191.399 [-295.683, -131.894] - loss: 12.281 - mae: 83.666 - mean_q: -110.788 Interval 9638 (4818500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4238 7 episodes - episode_reward: -170.637 [-207.505, -143.216] - loss: 11.181 - mae: 83.649 - mean_q: -110.763 Interval 9639 (4819000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3255 6 episodes - episode_reward: -187.308 [-233.917, -155.138] - loss: 8.900 - mae: 83.626 - mean_q: -110.751 Interval 9640 (4819500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2946 9 episodes - episode_reward: -188.088 [-253.995, -133.495] - loss: 8.294 - mae: 83.642 - mean_q: -110.763 Interval 9641 (4820000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7051 7 episodes - episode_reward: -180.291 [-235.775, -143.851] - loss: 10.470 - mae: 83.635 - mean_q: -110.771 Interval 9642 (4820500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8923 8 episodes - episode_reward: -180.304 [-268.737, -38.359] - loss: 14.000 - mae: 83.648 - mean_q: -110.764 Interval 9643 (4821000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8262 8 episodes - episode_reward: -178.802 [-253.035, -123.503] - loss: 9.942 - mae: 83.650 - mean_q: -110.746 Interval 9644 (4821500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6438 7 episodes - episode_reward: -200.378 [-250.460, -131.399] - loss: 8.765 - mae: 83.651 - mean_q: -110.751 Interval 9645 (4822000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2191 6 episodes - episode_reward: -168.778 [-205.989, -117.534] - loss: 9.209 - mae: 83.645 - mean_q: -110.771 Interval 9646 (4822500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2125 9 episodes - episode_reward: -179.588 [-244.335, -109.711] - loss: 10.973 - mae: 83.652 - mean_q: -110.762 Interval 9647 (4823000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6117 8 episodes - episode_reward: -171.754 [-224.492, -105.231] - loss: 14.189 - mae: 83.662 - mean_q: -110.725 Interval 9648 (4823500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4890 7 episodes - episode_reward: -175.745 [-309.749, -129.921] - loss: 11.478 - mae: 83.643 - mean_q: -110.702 Interval 9649 (4824000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9231 8 episodes - episode_reward: -183.993 [-220.621, -152.796] - loss: 13.598 - mae: 83.648 - mean_q: -110.672 Interval 9650 (4824500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6305 6 episodes - episode_reward: -218.856 [-328.541, -12.186] - loss: 8.324 - mae: 83.612 - mean_q: -110.644 Interval 9651 (4825000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2421 6 episodes - episode_reward: -185.761 [-393.691, 17.052] - loss: 13.164 - mae: 83.649 - mean_q: -110.614 Interval 9652 (4825500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5902 7 episodes - episode_reward: -186.347 [-209.274, -151.358] - loss: 15.042 - mae: 83.635 - mean_q: -110.572 Interval 9653 (4826000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4675 7 episodes - episode_reward: -169.913 [-263.400, -24.923] - loss: 11.914 - mae: 83.595 - mean_q: -110.556 Interval 9654 (4826500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4060 9 episodes - episode_reward: -196.574 [-276.124, -139.785] - loss: 9.755 - mae: 83.582 - mean_q: -110.550 Interval 9655 (4827000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5068 7 episodes - episode_reward: -183.218 [-282.717, -83.816] - loss: 15.798 - mae: 83.607 - mean_q: -110.521 Interval 9656 (4827500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1059 7 episodes - episode_reward: -214.643 [-302.290, -166.497] - loss: 12.363 - mae: 83.567 - mean_q: -110.496 Interval 9657 (4828000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0330 8 episodes - episode_reward: -184.137 [-255.561, -119.547] - loss: 12.576 - mae: 83.548 - mean_q: -110.483 Interval 9658 (4828500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0115 7 episodes - episode_reward: -216.698 [-371.268, -147.840] - loss: 10.584 - mae: 83.525 - mean_q: -110.468 Interval 9659 (4829000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2835 6 episodes - episode_reward: -192.604 [-253.384, -148.857] - loss: 7.613 - mae: 83.493 - mean_q: -110.484 Interval 9660 (4829500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0734 9 episodes - episode_reward: -176.999 [-313.281, -76.505] - loss: 10.800 - mae: 83.500 - mean_q: -110.491 Interval 9661 (4830000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1333 6 episodes - episode_reward: -172.186 [-220.883, -123.276] - loss: 9.895 - mae: 83.489 - mean_q: -110.497 Interval 9662 (4830500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2783 9 episodes - episode_reward: -180.780 [-217.582, -100.000] - loss: 11.609 - mae: 83.478 - mean_q: -110.504 Interval 9663 (4831000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9831 9 episodes - episode_reward: -168.201 [-202.570, -134.077] - loss: 10.958 - mae: 83.468 - mean_q: -110.514 Interval 9664 (4831500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1612 8 episodes - episode_reward: -188.158 [-226.119, -132.751] - loss: 9.768 - mae: 83.458 - mean_q: -110.521 Interval 9665 (4832000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6471 10 episodes - episode_reward: -144.994 [-313.595, 45.359] - loss: 10.808 - mae: 83.459 - mean_q: -110.537 Interval 9666 (4832500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2872 9 episodes - episode_reward: -179.029 [-293.126, -100.000] - loss: 8.988 - mae: 83.461 - mean_q: -110.534 Interval 9667 (4833000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9536 6 episodes - episode_reward: -239.965 [-472.813, -145.656] - loss: 11.123 - mae: 83.470 - mean_q: -110.544 Interval 9668 (4833500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9165 8 episodes - episode_reward: -188.478 [-244.298, -148.897] - loss: 11.479 - mae: 83.471 - mean_q: -110.526 Interval 9669 (4834000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4349 9 episodes - episode_reward: -184.127 [-239.567, -124.480] - loss: 11.444 - mae: 83.489 - mean_q: -110.543 Interval 9670 (4834500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2802 8 episodes - episode_reward: -192.599 [-245.553, -159.149] - loss: 9.091 - mae: 83.477 - mean_q: -110.548 Interval 9671 (4835000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4107 9 episodes - episode_reward: -203.777 [-315.909, -100.000] - loss: 10.387 - mae: 83.488 - mean_q: -110.547 Interval 9672 (4835500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3757 9 episodes - episode_reward: -178.047 [-277.898, -106.223] - loss: 11.174 - mae: 83.481 - mean_q: -110.555 Interval 9673 (4836000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4012 7 episodes - episode_reward: -182.167 [-353.899, 56.765] - loss: 11.187 - mae: 83.494 - mean_q: -110.546 Interval 9674 (4836500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9094 7 episodes - episode_reward: -191.010 [-221.120, -124.561] - loss: 11.708 - mae: 83.492 - mean_q: -110.537 Interval 9675 (4837000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8677 8 episodes - episode_reward: -187.024 [-380.987, -67.247] - loss: 11.383 - mae: 83.488 - mean_q: -110.551 Interval 9676 (4837500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4642 8 episodes - episode_reward: -223.493 [-351.830, -167.145] - loss: 9.223 - mae: 83.472 - mean_q: -110.583 Interval 9677 (4838000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7188 8 episodes - episode_reward: -162.858 [-210.095, -114.791] - loss: 11.575 - mae: 83.509 - mean_q: -110.589 Interval 9678 (4838500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9588 9 episodes - episode_reward: -171.525 [-234.154, -100.000] - loss: 12.848 - mae: 83.527 - mean_q: -110.595 Interval 9679 (4839000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8537 9 episodes - episode_reward: -161.187 [-193.779, -110.324] - loss: 8.987 - mae: 83.523 - mean_q: -110.608 Interval 9680 (4839500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5287 7 episodes - episode_reward: -172.735 [-248.732, -126.044] - loss: 12.372 - mae: 83.544 - mean_q: -110.611 Interval 9681 (4840000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0550 8 episodes - episode_reward: -192.313 [-278.182, -132.584] - loss: 11.301 - mae: 83.540 - mean_q: -110.611 Interval 9682 (4840500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9843 8 episodes - episode_reward: -187.733 [-231.107, -117.931] - loss: 10.911 - mae: 83.555 - mean_q: -110.635 Interval 9683 (4841000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7734 7 episodes - episode_reward: -180.482 [-220.365, -115.801] - loss: 10.747 - mae: 83.564 - mean_q: -110.648 Interval 9684 (4841500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0536 7 episodes - episode_reward: -234.677 [-271.521, -197.428] - loss: 10.672 - mae: 83.561 - mean_q: -110.686 Interval 9685 (4842000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4948 7 episodes - episode_reward: -178.433 [-229.012, -148.910] - loss: 11.386 - mae: 83.593 - mean_q: -110.714 Interval 9686 (4842500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4697 8 episodes - episode_reward: -146.400 [-190.123, -100.003] - loss: 10.404 - mae: 83.603 - mean_q: -110.734 Interval 9687 (4843000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2560 6 episodes - episode_reward: -195.901 [-245.881, -170.651] - loss: 10.634 - mae: 83.612 - mean_q: -110.744 Interval 9688 (4843500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5520 9 episodes - episode_reward: -201.844 [-553.656, -111.356] - loss: 11.450 - mae: 83.632 - mean_q: -110.784 Interval 9689 (4844000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4324 6 episodes - episode_reward: -191.771 [-298.121, -160.205] - loss: 12.044 - mae: 83.654 - mean_q: -110.802 Interval 9690 (4844500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2976 6 episodes - episode_reward: -190.307 [-345.987, 2.072] - loss: 13.487 - mae: 83.673 - mean_q: -110.813 Interval 9691 (4845000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.7355 8 episodes - episode_reward: -357.976 [-1168.808, -157.881] - loss: 6.608 - mae: 83.665 - mean_q: -110.837 Interval 9692 (4845500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7384 9 episodes - episode_reward: -162.404 [-201.265, -100.000] - loss: 9.604 - mae: 83.710 - mean_q: -110.899 Interval 9693 (4846000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5533 6 episodes - episode_reward: -212.175 [-286.138, -155.263] - loss: 12.223 - mae: 83.744 - mean_q: -110.936 Interval 9694 (4846500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7747 8 episodes - episode_reward: -171.820 [-239.586, -51.624] - loss: 11.091 - mae: 83.767 - mean_q: -110.950 Interval 9695 (4847000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8694 9 episodes - episode_reward: -154.110 [-199.064, -91.945] - loss: 8.950 - mae: 83.778 - mean_q: -110.983 Interval 9696 (4847500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9585 9 episodes - episode_reward: -170.274 [-236.248, -111.059] - loss: 11.780 - mae: 83.828 - mean_q: -111.019 Interval 9697 (4848000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6258 7 episodes - episode_reward: -189.010 [-282.480, -110.619] - loss: 14.427 - mae: 83.848 - mean_q: -111.027 Interval 9698 (4848500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1839 8 episodes - episode_reward: -197.276 [-280.276, -132.148] - loss: 11.518 - mae: 83.857 - mean_q: -111.052 Interval 9699 (4849000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6229 7 episodes - episode_reward: -181.973 [-256.407, -131.942] - loss: 11.866 - mae: 83.880 - mean_q: -111.060 Interval 9700 (4849500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4883 8 episodes - episode_reward: -160.865 [-197.465, -100.000] - loss: 9.769 - mae: 83.883 - mean_q: -111.070 Interval 9701 (4850000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3263 6 episodes - episode_reward: -105.311 [-241.097, 61.619] - loss: 9.433 - mae: 83.922 - mean_q: -111.114 Interval 9702 (4850500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4088 9 episodes - episode_reward: -193.036 [-329.971, -100.000] - loss: 10.833 - mae: 83.969 - mean_q: -111.151 Interval 9703 (4851000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2873 7 episodes - episode_reward: -163.805 [-255.975, -9.868] - loss: 14.001 - mae: 84.014 - mean_q: -111.176 Interval 9704 (4851500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1911 8 episodes - episode_reward: -198.178 [-303.625, -100.000] - loss: 11.543 - mae: 84.044 - mean_q: -111.183 Interval 9705 (4852000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4589 8 episodes - episode_reward: -149.848 [-209.559, -52.771] - loss: 10.690 - mae: 84.063 - mean_q: -111.210 Interval 9706 (4852500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1522 9 episodes - episode_reward: -179.376 [-246.846, -100.000] - loss: 10.148 - mae: 84.086 - mean_q: -111.226 Interval 9707 (4853000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3895 8 episodes - episode_reward: -212.318 [-308.373, -161.905] - loss: 11.348 - mae: 84.112 - mean_q: -111.249 Interval 9708 (4853500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1428 8 episodes - episode_reward: -196.998 [-311.550, -100.000] - loss: 9.116 - mae: 84.127 - mean_q: -111.273 Interval 9709 (4854000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0399 8 episodes - episode_reward: -190.760 [-226.887, -124.924] - loss: 8.584 - mae: 84.152 - mean_q: -111.326 Interval 9710 (4854500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4983 6 episodes - episode_reward: -197.658 [-346.629, -146.442] - loss: 8.664 - mae: 84.195 - mean_q: -111.376 Interval 9711 (4855000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6723 8 episodes - episode_reward: -173.708 [-209.711, -100.000] - loss: 9.769 - mae: 84.238 - mean_q: -111.426 Interval 9712 (4855500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4998 9 episodes - episode_reward: -175.514 [-235.342, -100.000] - loss: 10.660 - mae: 84.272 - mean_q: -111.435 Interval 9713 (4856000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8100 8 episodes - episode_reward: -186.662 [-291.739, -122.007] - loss: 9.923 - mae: 84.293 - mean_q: -111.465 Interval 9714 (4856500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9361 8 episodes - episode_reward: -185.861 [-249.914, -139.485] - loss: 8.572 - mae: 84.304 - mean_q: -111.500 Interval 9715 (4857000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1546 9 episodes - episode_reward: -179.554 [-231.245, -130.897] - loss: 10.779 - mae: 84.344 - mean_q: -111.543 Interval 9716 (4857500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3905 7 episodes - episode_reward: -173.748 [-278.135, -105.905] - loss: 11.378 - mae: 84.388 - mean_q: -111.580 Interval 9717 (4858000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7479 8 episodes - episode_reward: -171.674 [-229.627, -100.144] - loss: 11.770 - mae: 84.419 - mean_q: -111.590 Interval 9718 (4858500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5256 8 episodes - episode_reward: -158.068 [-197.828, -123.260] - loss: 11.155 - mae: 84.424 - mean_q: -111.603 Interval 9719 (4859000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2843 9 episodes - episode_reward: -177.531 [-247.063, -136.898] - loss: 10.199 - mae: 84.431 - mean_q: -111.638 Interval 9720 (4859500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2643 8 episodes - episode_reward: -193.488 [-265.476, -123.721] - loss: 11.691 - mae: 84.463 - mean_q: -111.645 Interval 9721 (4860000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5225 9 episodes - episode_reward: -150.791 [-247.223, -15.140] - loss: 6.404 - mae: 84.458 - mean_q: -111.675 Interval 9722 (4860500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0795 8 episodes - episode_reward: -194.886 [-324.212, -100.000] - loss: 12.411 - mae: 84.509 - mean_q: -111.699 Interval 9723 (4861000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1384 7 episodes - episode_reward: -210.443 [-378.512, -115.187] - loss: 10.000 - mae: 84.516 - mean_q: -111.758 Interval 9724 (4861500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7452 7 episodes - episode_reward: -202.462 [-303.235, -113.583] - loss: 12.657 - mae: 84.551 - mean_q: -111.762 Interval 9725 (4862000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5963 7 episodes - episode_reward: -190.387 [-236.811, -151.094] - loss: 11.491 - mae: 84.580 - mean_q: -111.779 Interval 9726 (4862500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0214 7 episodes - episode_reward: -140.417 [-286.004, 34.691] - loss: 10.021 - mae: 84.578 - mean_q: -111.798 Interval 9727 (4863000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5401 11 episodes - episode_reward: -160.980 [-255.531, -31.153] - loss: 10.388 - mae: 84.570 - mean_q: -111.805 Interval 9728 (4863500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.4329 8 episodes - episode_reward: -215.562 [-287.671, -168.636] - loss: 11.248 - mae: 84.552 - mean_q: -111.813 Interval 9729 (4864000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6210 8 episodes - episode_reward: -164.446 [-222.447, -100.000] - loss: 13.502 - mae: 84.545 - mean_q: -111.869 Interval 9730 (4864500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3097 6 episodes - episode_reward: -195.861 [-278.121, -106.459] - loss: 10.900 - mae: 84.530 - mean_q: -111.885 Interval 9731 (4865000 steps performed) 500/500 [==============================] - ETA: 0s - reward: -5.04 - 2s 4ms/step - reward: -4.9063 6 episodes - episode_reward: -417.697 [-1056.159, -100.000] - loss: 14.169 - mae: 84.531 - mean_q: -111.853 Interval 9732 (4865500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6669 4 episodes - episode_reward: -515.204 [-961.865, -85.203] - loss: 10.047 - mae: 84.516 - mean_q: -111.825 Interval 9733 (4866000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.9748 4 episodes - episode_reward: -680.587 [-1120.747, -389.067] - loss: 8.217 - mae: 84.528 - mean_q: -111.849 Interval 9734 (4866500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3677 5 episodes - episode_reward: -337.147 [-676.399, -189.781] - loss: 9.025 - mae: 84.572 - mean_q: -111.924 Interval 9735 (4867000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3751 7 episodes - episode_reward: -164.888 [-274.585, 14.474] - loss: 11.226 - mae: 84.642 - mean_q: -111.924 Interval 9736 (4867500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2710 7 episodes - episode_reward: -160.218 [-201.014, -114.410] - loss: 10.537 - mae: 84.675 - mean_q: -111.945 Interval 9737 (4868000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3175 8 episodes - episode_reward: -149.134 [-247.404, -46.122] - loss: 13.139 - mae: 84.718 - mean_q: -111.938 Interval 9738 (4868500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8480 8 episodes - episode_reward: -182.505 [-249.934, -100.000] - loss: 13.510 - mae: 84.761 - mean_q: -111.938 Interval 9739 (4869000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7715 7 episodes - episode_reward: -182.178 [-351.564, -119.821] - loss: 10.806 - mae: 84.781 - mean_q: -111.942 Interval 9740 (4869500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0706 8 episodes - episode_reward: -201.662 [-356.807, -116.100] - loss: 10.632 - mae: 84.795 - mean_q: -111.955 Interval 9741 (4870000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5665 7 episodes - episode_reward: -172.284 [-279.340, -109.262] - loss: 11.463 - mae: 84.818 - mean_q: -111.977 Interval 9742 (4870500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5424 6 episodes - episode_reward: -209.394 [-383.777, -140.732] - loss: 11.245 - mae: 84.845 - mean_q: -111.983 Interval 9743 (4871000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.3983 8 episodes - episode_reward: -226.120 [-382.896, -153.370] - loss: 16.796 - mae: 84.874 - mean_q: -111.957 Interval 9744 (4871500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4772 7 episodes - episode_reward: -247.781 [-422.073, -142.002] - loss: 11.237 - mae: 84.873 - mean_q: -111.935 Interval 9745 (4872000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9849 9 episodes - episode_reward: -167.097 [-220.397, -112.087] - loss: 12.580 - mae: 84.892 - mean_q: -111.955 Interval 9746 (4872500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6623 8 episodes - episode_reward: -164.215 [-190.656, -143.610] - loss: 13.490 - mae: 84.914 - mean_q: -111.981 Interval 9747 (4873000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0564 7 episodes - episode_reward: -213.112 [-382.799, -100.000] - loss: 11.609 - mae: 84.906 - mean_q: -111.995 Interval 9748 (4873500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5759 8 episodes - episode_reward: -167.198 [-278.988, -70.276] - loss: 12.213 - mae: 84.927 - mean_q: -111.994 Interval 9749 (4874000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0531 6 episodes - episode_reward: -165.175 [-226.188, -134.699] - loss: 11.685 - mae: 84.913 - mean_q: -112.013 Interval 9750 (4874500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7438 7 episodes - episode_reward: -197.850 [-264.265, -143.128] - loss: 14.522 - mae: 84.948 - mean_q: -112.031 Interval 9751 (4875000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4249 6 episodes - episode_reward: -199.048 [-306.662, -155.043] - loss: 12.915 - mae: 84.955 - mean_q: -112.016 Interval 9752 (4875500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9606 7 episodes - episode_reward: -204.870 [-267.043, -155.427] - loss: 10.255 - mae: 84.963 - mean_q: -112.023 Interval 9753 (4876000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7396 8 episodes - episode_reward: -179.174 [-306.452, -42.404] - loss: 10.259 - mae: 84.968 - mean_q: -112.039 Interval 9754 (4876500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7436 9 episodes - episode_reward: -149.590 [-286.785, 22.620] - loss: 13.337 - mae: 85.000 - mean_q: -112.055 Interval 9755 (4877000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8929 8 episodes - episode_reward: -177.782 [-217.418, -146.381] - loss: 12.599 - mae: 85.012 - mean_q: -112.056 Interval 9756 (4877500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5302 10 episodes - episode_reward: -185.248 [-315.533, -100.000] - loss: 13.492 - mae: 85.021 - mean_q: -112.061 Interval 9757 (4878000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5290 7 episodes - episode_reward: -171.036 [-235.535, -114.068] - loss: 10.266 - mae: 85.013 - mean_q: -112.048 Interval 9758 (4878500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0601 8 episodes - episode_reward: -181.070 [-248.878, -130.831] - loss: 12.083 - mae: 85.016 - mean_q: -112.054 Interval 9759 (4879000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3707 8 episodes - episode_reward: -159.793 [-267.685, -78.112] - loss: 15.396 - mae: 85.036 - mean_q: -112.040 Interval 9760 (4879500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3019 7 episodes - episode_reward: -161.580 [-221.172, -110.913] - loss: 12.366 - mae: 85.028 - mean_q: -112.014 Interval 9761 (4880000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6231 8 episodes - episode_reward: -171.412 [-244.278, -91.648] - loss: 12.668 - mae: 85.029 - mean_q: -112.015 Interval 9762 (4880500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3839 7 episodes - episode_reward: -162.484 [-242.080, -21.708] - loss: 12.816 - mae: 85.037 - mean_q: -111.998 Interval 9763 (4881000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7376 8 episodes - episode_reward: -165.916 [-230.935, -73.624] - loss: 14.750 - mae: 85.045 - mean_q: -111.998 Interval 9764 (4881500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0075 8 episodes - episode_reward: -191.967 [-243.580, -124.406] - loss: 9.151 - mae: 85.036 - mean_q: -111.996 Interval 9765 (4882000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8908 7 episodes - episode_reward: -210.525 [-317.370, -165.343] - loss: 10.644 - mae: 85.056 - mean_q: -112.005 Interval 9766 (4882500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8650 8 episodes - episode_reward: -166.415 [-200.853, -96.340] - loss: 14.293 - mae: 85.072 - mean_q: -112.018 Interval 9767 (4883000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2687 8 episodes - episode_reward: -221.277 [-252.753, -175.842] - loss: 11.454 - mae: 85.062 - mean_q: -112.005 Interval 9768 (4883500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5657 6 episodes - episode_reward: -209.458 [-311.850, -140.966] - loss: 14.554 - mae: 85.072 - mean_q: -111.969 Interval 9769 (4884000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5646 9 episodes - episode_reward: -156.557 [-212.635, -100.000] - loss: 14.659 - mae: 85.054 - mean_q: -111.947 Interval 9770 (4884500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9915 7 episodes - episode_reward: -126.827 [-348.640, 1.476] - loss: 11.409 - mae: 85.022 - mean_q: -111.948 Interval 9771 (4885000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9233 8 episodes - episode_reward: -174.562 [-233.443, -100.000] - loss: 13.957 - mae: 85.031 - mean_q: -111.945 Interval 9772 (4885500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0679 9 episodes - episode_reward: -166.602 [-370.226, 61.040] - loss: 14.320 - mae: 85.008 - mean_q: -111.922 Interval 9773 (4886000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0310 8 episodes - episode_reward: -194.742 [-297.960, -123.434] - loss: 14.390 - mae: 84.992 - mean_q: -111.898 Interval 9774 (4886500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.9466 10 episodes - episode_reward: -205.950 [-333.295, -100.000] - loss: 12.102 - mae: 84.970 - mean_q: -111.907 Interval 9775 (4887000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9679 8 episodes - episode_reward: -180.483 [-216.354, -121.677] - loss: 14.219 - mae: 84.966 - mean_q: -111.900 Interval 9776 (4887500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0315 7 episodes - episode_reward: -216.066 [-445.488, -132.547] - loss: 12.824 - mae: 84.956 - mean_q: -111.876 Interval 9777 (4888000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4747 6 episodes - episode_reward: -204.437 [-257.394, -177.958] - loss: 12.025 - mae: 84.935 - mean_q: -111.859 Interval 9778 (4888500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9459 7 episodes - episode_reward: -211.292 [-297.232, -152.507] - loss: 12.185 - mae: 84.945 - mean_q: -111.859 Interval 9779 (4889000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9814 7 episodes - episode_reward: -204.180 [-253.283, -169.988] - loss: 11.251 - mae: 84.953 - mean_q: -111.866 Interval 9780 (4889500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7560 9 episodes - episode_reward: -163.159 [-218.833, -41.356] - loss: 12.384 - mae: 84.932 - mean_q: -111.850 Interval 9781 (4890000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.7417 9 episodes - episode_reward: -205.801 [-274.479, -135.751] - loss: 14.017 - mae: 84.925 - mean_q: -111.820 Interval 9782 (4890500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5008 8 episodes - episode_reward: -213.294 [-378.673, -126.091] - loss: 14.410 - mae: 84.923 - mean_q: -111.799 Interval 9783 (4891000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9615 8 episodes - episode_reward: -195.521 [-253.160, -138.141] - loss: 11.736 - mae: 84.887 - mean_q: -111.790 Interval 9784 (4891500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8799 6 episodes - episode_reward: -218.067 [-332.077, -139.027] - loss: 11.253 - mae: 84.891 - mean_q: -111.790 Interval 9785 (4892000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.3397 6 episodes - episode_reward: -129.919 [-202.549, 27.524] - loss: 14.170 - mae: 84.912 - mean_q: -111.797 Interval 9786 (4892500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4209 11 episodes - episode_reward: -153.403 [-250.713, 16.992] - loss: 11.908 - mae: 84.882 - mean_q: -111.785 Interval 9787 (4893000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9671 8 episodes - episode_reward: -182.705 [-231.037, -146.276] - loss: 14.833 - mae: 84.882 - mean_q: -111.786 Interval 9788 (4893500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1805 9 episodes - episode_reward: -175.161 [-251.662, -143.019] - loss: 11.515 - mae: 84.866 - mean_q: -111.803 Interval 9789 (4894000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4063 9 episodes - episode_reward: -186.421 [-289.611, -109.438] - loss: 13.014 - mae: 84.866 - mean_q: -111.808 Interval 9790 (4894500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1384 8 episodes - episode_reward: -194.762 [-207.382, -181.395] - loss: 12.601 - mae: 84.848 - mean_q: -111.808 Interval 9791 (4895000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8493 8 episodes - episode_reward: -192.915 [-272.515, -100.000] - loss: 12.880 - mae: 84.829 - mean_q: -111.800 Interval 9792 (4895500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1107 8 episodes - episode_reward: -192.279 [-257.741, -77.732] - loss: 16.234 - mae: 84.824 - mean_q: -111.794 Interval 9793 (4896000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7392 7 episodes - episode_reward: -188.060 [-262.249, -146.154] - loss: 14.544 - mae: 84.800 - mean_q: -111.797 Interval 9794 (4896500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8419 8 episodes - episode_reward: -185.914 [-313.233, -121.724] - loss: 12.294 - mae: 84.774 - mean_q: -111.782 Interval 9795 (4897000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6043 6 episodes - episode_reward: -199.662 [-371.198, -128.861] - loss: 13.918 - mae: 84.782 - mean_q: -111.780 Interval 9796 (4897500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5194 7 episodes - episode_reward: -170.203 [-237.720, -12.546] - loss: 14.630 - mae: 84.769 - mean_q: -111.768 Interval 9797 (4898000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4987 10 episodes - episode_reward: -184.844 [-276.158, -111.461] - loss: 15.949 - mae: 84.765 - mean_q: -111.747 Interval 9798 (4898500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4663 7 episodes - episode_reward: -166.616 [-206.692, -76.001] - loss: 13.952 - mae: 84.723 - mean_q: -111.717 Interval 9799 (4899000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1200 6 episodes - episode_reward: -188.391 [-220.622, -114.662] - loss: 11.060 - mae: 84.711 - mean_q: -111.728 Interval 9800 (4899500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0014 9 episodes - episode_reward: -170.603 [-293.469, -100.000] - loss: 15.178 - mae: 84.715 - mean_q: -111.732 Interval 9801 (4900000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2968 8 episodes - episode_reward: -199.239 [-276.789, -134.138] - loss: 13.192 - mae: 84.681 - mean_q: -111.731 Interval 9802 (4900500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8993 8 episodes - episode_reward: -186.264 [-224.662, -139.972] - loss: 18.641 - mae: 84.690 - mean_q: -111.738 Interval 9803 (4901000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7663 11 episodes - episode_reward: -174.892 [-221.381, -124.736] - loss: 11.627 - mae: 84.653 - mean_q: -111.725 Interval 9804 (4901500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2654 7 episodes - episode_reward: -151.985 [-177.660, -100.000] - loss: 11.915 - mae: 84.633 - mean_q: -111.756 Interval 9805 (4902000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9130 8 episodes - episode_reward: -186.732 [-248.147, -145.538] - loss: 11.033 - mae: 84.640 - mean_q: -111.776 Interval 9806 (4902500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2292 8 episodes - episode_reward: -204.359 [-307.192, -152.872] - loss: 10.955 - mae: 84.653 - mean_q: -111.775 Interval 9807 (4903000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7337 8 episodes - episode_reward: -170.619 [-240.567, -80.100] - loss: 12.081 - mae: 84.646 - mean_q: -111.792 Interval 9808 (4903500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9991 9 episodes - episode_reward: -159.441 [-205.899, -100.000] - loss: 9.283 - mae: 84.637 - mean_q: -111.855 Interval 9809 (4904000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2919 9 episodes - episode_reward: -183.883 [-273.859, -84.644] - loss: 13.989 - mae: 84.677 - mean_q: -111.860 Interval 9810 (4904500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9025 9 episodes - episode_reward: -163.256 [-212.318, -114.231] - loss: 13.460 - mae: 84.666 - mean_q: -111.845 Interval 9811 (4905000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8343 7 episodes - episode_reward: -192.972 [-327.412, -113.590] - loss: 14.232 - mae: 84.666 - mean_q: -111.832 Interval 9812 (4905500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1077 9 episodes - episode_reward: -177.187 [-260.919, -102.176] - loss: 12.391 - mae: 84.676 - mean_q: -111.826 Interval 9813 (4906000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.7718 6 episodes - episode_reward: -146.026 [-206.000, -42.614] - loss: 14.099 - mae: 84.667 - mean_q: -111.817 Interval 9814 (4906500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7250 8 episodes - episode_reward: -164.925 [-197.449, -116.940] - loss: 10.521 - mae: 84.640 - mean_q: -111.822 Interval 9815 (4907000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7719 8 episodes - episode_reward: -180.042 [-307.435, -27.529] - loss: 11.868 - mae: 84.644 - mean_q: -111.858 Interval 9816 (4907500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2963 8 episodes - episode_reward: -202.632 [-221.451, -175.576] - loss: 9.799 - mae: 84.636 - mean_q: -111.894 Interval 9817 (4908000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1649 7 episodes - episode_reward: -157.152 [-262.349, -10.702] - loss: 11.836 - mae: 84.667 - mean_q: -111.898 Interval 9818 (4908500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2718 10 episodes - episode_reward: -168.231 [-207.391, -100.000] - loss: 15.006 - mae: 84.707 - mean_q: -111.898 Interval 9819 (4909000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.9474 6 episodes - episode_reward: -159.431 [-273.412, -22.582] - loss: 14.489 - mae: 84.704 - mean_q: -111.863 Interval 9820 (4909500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7038 8 episodes - episode_reward: -167.969 [-253.052, -95.855] - loss: 12.560 - mae: 84.696 - mean_q: -111.867 Interval 9821 (4910000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0441 7 episodes - episode_reward: -206.571 [-383.636, -127.484] - loss: 12.423 - mae: 84.678 - mean_q: -111.891 Interval 9822 (4910500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4200 8 episodes - episode_reward: -175.447 [-223.203, -147.306] - loss: 9.310 - mae: 84.670 - mean_q: -111.924 Interval 9823 (4911000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.2431 13 episodes - episode_reward: -156.727 [-235.010, -11.403] - loss: 11.934 - mae: 84.688 - mean_q: -111.967 Interval 9824 (4911500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9050 9 episodes - episode_reward: -162.880 [-278.717, -105.397] - loss: 10.112 - mae: 84.682 - mean_q: -112.000 Interval 9825 (4912000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6767 7 episodes - episode_reward: -179.404 [-272.619, -101.662] - loss: 12.365 - mae: 84.708 - mean_q: -112.006 Interval 9826 (4912500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7485 9 episodes - episode_reward: -206.046 [-540.036, -100.000] - loss: 16.558 - mae: 84.717 - mean_q: -111.984 Interval 9827 (4913000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3213 7 episodes - episode_reward: -169.640 [-280.406, -28.795] - loss: 11.527 - mae: 84.699 - mean_q: -111.993 Interval 9828 (4913500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2087 7 episodes - episode_reward: -238.916 [-282.593, -166.882] - loss: 14.440 - mae: 84.726 - mean_q: -112.002 Interval 9829 (4914000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8613 7 episodes - episode_reward: -205.505 [-317.951, -112.560] - loss: 13.883 - mae: 84.731 - mean_q: -112.001 Interval 9830 (4914500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4451 6 episodes - episode_reward: -190.400 [-257.718, -136.808] - loss: 13.718 - mae: 84.720 - mean_q: -111.996 Interval 9831 (4915000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5443 7 episodes - episode_reward: -181.654 [-226.095, -94.645] - loss: 13.027 - mae: 84.711 - mean_q: -112.011 Interval 9832 (4915500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3092 8 episodes - episode_reward: -151.221 [-278.227, -67.935] - loss: 13.563 - mae: 84.691 - mean_q: -112.044 Interval 9833 (4916000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9062 8 episodes - episode_reward: -180.543 [-247.279, -148.738] - loss: 11.013 - mae: 84.653 - mean_q: -112.063 Interval 9834 (4916500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8314 7 episodes - episode_reward: -192.397 [-244.842, -137.302] - loss: 13.755 - mae: 84.622 - mean_q: -112.074 Interval 9835 (4917000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2655 7 episodes - episode_reward: -168.121 [-269.143, -31.926] - loss: 11.022 - mae: 84.572 - mean_q: -112.107 Interval 9836 (4917500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6082 7 episodes - episode_reward: -187.275 [-263.804, -105.974] - loss: 15.779 - mae: 84.562 - mean_q: -112.111 Interval 9837 (4918000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1305 8 episodes - episode_reward: -201.118 [-281.744, -118.658] - loss: 12.953 - mae: 84.514 - mean_q: -112.120 Interval 9838 (4918500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1672 8 episodes - episode_reward: -195.227 [-309.256, -100.000] - loss: 18.856 - mae: 84.526 - mean_q: -112.116 Interval 9839 (4919000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1987 7 episodes - episode_reward: -178.747 [-484.148, -93.350] - loss: 13.469 - mae: 84.488 - mean_q: -112.061 Interval 9840 (4919500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -6.3339 6 episodes - episode_reward: -585.881 [-901.649, -139.270] - loss: 13.612 - mae: 84.479 - mean_q: -112.003 Interval 9841 (4920000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.4400 4 episodes - episode_reward: -526.503 [-676.916, -416.414] - loss: 10.812 - mae: 84.477 - mean_q: -112.026 Interval 9842 (4920500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9171 8 episodes - episode_reward: -200.681 [-291.664, -161.484] - loss: 10.714 - mae: 84.503 - mean_q: -112.088 Interval 9843 (4921000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1611 7 episodes - episode_reward: -157.239 [-220.266, -34.524] - loss: 11.780 - mae: 84.530 - mean_q: -112.108 Interval 9844 (4921500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4358 7 episodes - episode_reward: -168.813 [-231.182, -132.250] - loss: 13.873 - mae: 84.564 - mean_q: -112.116 Interval 9845 (4922000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7973 8 episodes - episode_reward: -171.834 [-272.894, -130.426] - loss: 16.009 - mae: 84.591 - mean_q: -112.113 Interval 9846 (4922500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9095 8 episodes - episode_reward: -186.020 [-270.515, -111.902] - loss: 8.184 - mae: 84.568 - mean_q: -112.125 Interval 9847 (4923000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2726 7 episodes - episode_reward: -372.254 [-983.409, -136.448] - loss: 11.695 - mae: 84.623 - mean_q: -112.170 Interval 9848 (4923500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8379 7 episodes - episode_reward: -204.010 [-295.122, -132.937] - loss: 11.124 - mae: 84.652 - mean_q: -112.195 Interval 9849 (4924000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0959 6 episodes - episode_reward: -157.203 [-224.801, -86.012] - loss: 11.946 - mae: 84.687 - mean_q: -112.209 Interval 9850 (4924500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6376 7 episodes - episode_reward: -192.945 [-316.967, -75.114] - loss: 11.430 - mae: 84.709 - mean_q: -112.230 Interval 9851 (4925000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9121 9 episodes - episode_reward: -174.185 [-215.098, -135.584] - loss: 11.924 - mae: 84.733 - mean_q: -112.226 Interval 9852 (4925500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6650 8 episodes - episode_reward: -162.429 [-254.772, -24.228] - loss: 13.254 - mae: 84.767 - mean_q: -112.216 Interval 9853 (4926000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4974 7 episodes - episode_reward: -182.158 [-213.469, -140.993] - loss: 13.271 - mae: 84.791 - mean_q: -112.216 Interval 9854 (4926500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8401 7 episodes - episode_reward: -197.652 [-305.230, -116.624] - loss: 14.440 - mae: 84.824 - mean_q: -112.199 Interval 9855 (4927000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2472 7 episodes - episode_reward: -156.893 [-192.390, -113.663] - loss: 9.686 - mae: 84.831 - mean_q: -112.201 Interval 9856 (4927500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5088 8 episodes - episode_reward: -164.378 [-256.617, -66.353] - loss: 13.609 - mae: 84.873 - mean_q: -112.210 Interval 9857 (4928000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.3927 6 episodes - episode_reward: -172.486 [-233.189, -139.058] - loss: 13.854 - mae: 84.891 - mean_q: -112.196 Interval 9858 (4928500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3585 9 episodes - episode_reward: -202.398 [-319.923, -133.771] - loss: 9.679 - mae: 84.884 - mean_q: -112.213 Interval 9859 (4929000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1220 8 episodes - episode_reward: -189.397 [-256.704, -96.933] - loss: 10.108 - mae: 84.890 - mean_q: -112.250 Interval 9860 (4929500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -6.8126 9 episodes - episode_reward: -378.665 [-684.699, -221.646] - loss: 11.450 - mae: 84.912 - mean_q: -112.295 Interval 9861 (4930000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5245 9 episodes - episode_reward: -201.691 [-326.965, -137.354] - loss: 12.279 - mae: 84.971 - mean_q: -112.329 Interval 9862 (4930500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9342 8 episodes - episode_reward: -184.433 [-262.159, -137.895] - loss: 12.333 - mae: 85.018 - mean_q: -112.337 Interval 9863 (4931000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2935 8 episodes - episode_reward: -205.867 [-289.320, -145.660] - loss: 11.107 - mae: 85.032 - mean_q: -112.344 Interval 9864 (4931500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.8526 8 episodes - episode_reward: -173.293 [-318.545, -105.765] - loss: 12.666 - mae: 85.066 - mean_q: -112.348 Interval 9865 (4932000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.5714 10 episodes - episode_reward: -182.904 [-244.341, -100.000] - loss: 14.393 - mae: 85.113 - mean_q: -112.345 Interval 9866 (4932500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5849 7 episodes - episode_reward: -186.962 [-216.838, -154.202] - loss: 10.366 - mae: 85.127 - mean_q: -112.345 Interval 9867 (4933000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9275 8 episodes - episode_reward: -172.487 [-247.863, -100.000] - loss: 10.969 - mae: 85.151 - mean_q: -112.355 Interval 9868 (4933500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7067 8 episodes - episode_reward: -174.321 [-230.134, -53.420] - loss: 12.676 - mae: 85.173 - mean_q: -112.383 Interval 9869 (4934000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9873 6 episodes - episode_reward: -159.669 [-257.954, 17.594] - loss: 13.486 - mae: 85.194 - mean_q: -112.394 Interval 9870 (4934500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0440 7 episodes - episode_reward: -221.654 [-469.316, -144.840] - loss: 10.038 - mae: 85.197 - mean_q: -112.390 Interval 9871 (4935000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5005 9 episodes - episode_reward: -198.091 [-275.063, -123.902] - loss: 12.563 - mae: 85.226 - mean_q: -112.404 Interval 9872 (4935500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5862 5 episodes - episode_reward: -226.628 [-288.037, -177.468] - loss: 11.415 - mae: 85.248 - mean_q: -112.413 Interval 9873 (4936000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6804 9 episodes - episode_reward: -169.037 [-305.342, -8.192] - loss: 9.415 - mae: 85.248 - mean_q: -112.441 Interval 9874 (4936500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7890 8 episodes - episode_reward: -154.433 [-197.556, -26.608] - loss: 12.091 - mae: 85.280 - mean_q: -112.460 Interval 9875 (4937000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6899 7 episodes - episode_reward: -198.021 [-272.573, -139.272] - loss: 9.975 - mae: 85.303 - mean_q: -112.497 Interval 9876 (4937500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6460 8 episodes - episode_reward: -175.268 [-242.491, -54.519] - loss: 10.042 - mae: 85.324 - mean_q: -112.537 Interval 9877 (4938000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8141 6 episodes - episode_reward: -148.461 [-232.234, 31.826] - loss: 17.708 - mae: 85.381 - mean_q: -112.553 Interval 9878 (4938500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5555 9 episodes - episode_reward: -142.190 [-197.966, 14.610] - loss: 11.909 - mae: 85.375 - mean_q: -112.542 Interval 9879 (4939000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2480 8 episodes - episode_reward: -138.849 [-220.179, -36.683] - loss: 13.500 - mae: 85.426 - mean_q: -112.539 Interval 9880 (4939500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5374 7 episodes - episode_reward: -177.785 [-241.264, -116.523] - loss: 15.222 - mae: 85.434 - mean_q: -112.523 Interval 9881 (4940000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6321 8 episodes - episode_reward: -170.838 [-251.997, -79.430] - loss: 11.336 - mae: 85.418 - mean_q: -112.498 Interval 9882 (4940500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2147 9 episodes - episode_reward: -176.373 [-215.793, -141.062] - loss: 8.808 - mae: 85.412 - mean_q: -112.512 Interval 9883 (4941000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.0645 6 episodes - episode_reward: -171.017 [-240.922, 2.417] - loss: 13.190 - mae: 85.442 - mean_q: -112.525 Interval 9884 (4941500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1220 8 episodes - episode_reward: -188.337 [-256.715, -110.253] - loss: 15.246 - mae: 85.445 - mean_q: -112.509 Interval 9885 (4942000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.1321 7 episodes - episode_reward: -231.751 [-309.176, -155.693] - loss: 9.818 - mae: 85.418 - mean_q: -112.498 Interval 9886 (4942500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2307 9 episodes - episode_reward: -176.388 [-253.210, -100.000] - loss: 16.853 - mae: 85.449 - mean_q: -112.492 Interval 9887 (4943000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9725 8 episodes - episode_reward: -195.700 [-236.355, -162.580] - loss: 12.612 - mae: 85.435 - mean_q: -112.469 Interval 9888 (4943500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7044 7 episodes - episode_reward: -183.440 [-255.683, -154.648] - loss: 13.895 - mae: 85.444 - mean_q: -112.437 Interval 9889 (4944000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2855 7 episodes - episode_reward: -156.257 [-235.209, -44.080] - loss: 11.069 - mae: 85.434 - mean_q: -112.418 Interval 9890 (4944500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6505 7 episodes - episode_reward: -195.806 [-283.735, -134.424] - loss: 10.655 - mae: 85.433 - mean_q: -112.425 Interval 9891 (4945000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7074 8 episodes - episode_reward: -170.675 [-230.195, -37.399] - loss: 11.686 - mae: 85.430 - mean_q: -112.408 Interval 9892 (4945500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6065 9 episodes - episode_reward: -148.656 [-266.388, -91.232] - loss: 11.978 - mae: 85.415 - mean_q: -112.400 Interval 9893 (4946000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7043 8 episodes - episode_reward: -170.815 [-211.222, -117.296] - loss: 19.073 - mae: 85.445 - mean_q: -112.367 Interval 9894 (4946500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5507 6 episodes - episode_reward: -206.768 [-341.640, -32.290] - loss: 12.477 - mae: 85.410 - mean_q: -112.328 Interval 9895 (4947000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5524 8 episodes - episode_reward: -209.714 [-308.317, -104.015] - loss: 14.223 - mae: 85.425 - mean_q: -112.308 Interval 9896 (4947500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.8641 11 episodes - episode_reward: -185.682 [-247.780, -121.076] - loss: 16.074 - mae: 85.396 - mean_q: -112.293 Interval 9897 (4948000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6787 9 episodes - episode_reward: -147.402 [-198.510, -43.831] - loss: 11.820 - mae: 85.379 - mean_q: -112.308 Interval 9898 (4948500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9008 9 episodes - episode_reward: -163.213 [-228.620, -140.633] - loss: 12.007 - mae: 85.363 - mean_q: -112.319 Interval 9899 (4949000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9271 8 episodes - episode_reward: -184.937 [-230.197, -119.708] - loss: 13.031 - mae: 85.383 - mean_q: -112.328 Interval 9900 (4949500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.7572 10 episodes - episode_reward: -182.824 [-263.353, -108.036] - loss: 12.047 - mae: 85.364 - mean_q: -112.322 Interval 9901 (4950000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0917 9 episodes - episode_reward: -176.606 [-318.029, -11.866] - loss: 12.872 - mae: 85.367 - mean_q: -112.330 Interval 9902 (4950500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2627 8 episodes - episode_reward: -198.200 [-276.237, -77.143] - loss: 17.582 - mae: 85.383 - mean_q: -112.312 Interval 9903 (4951000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0176 8 episodes - episode_reward: -195.211 [-249.057, -127.171] - loss: 16.268 - mae: 85.369 - mean_q: -112.286 Interval 9904 (4951500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2994 9 episodes - episode_reward: -174.011 [-240.591, -107.785] - loss: 15.705 - mae: 85.353 - mean_q: -112.251 Interval 9905 (4952000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0774 9 episodes - episode_reward: -172.904 [-246.093, -109.415] - loss: 16.211 - mae: 85.342 - mean_q: -112.236 Interval 9906 (4952500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0849 8 episodes - episode_reward: -194.509 [-254.986, -100.000] - loss: 15.116 - mae: 85.335 - mean_q: -112.222 Interval 9907 (4953000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4609 8 episodes - episode_reward: -222.620 [-401.857, -135.085] - loss: 14.003 - mae: 85.319 - mean_q: -112.210 Interval 9908 (4953500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3307 9 episodes - episode_reward: -168.933 [-261.154, -113.496] - loss: 13.452 - mae: 85.304 - mean_q: -112.212 Interval 9909 (4954000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7054 8 episodes - episode_reward: -190.482 [-295.413, -134.570] - loss: 13.179 - mae: 85.303 - mean_q: -112.213 Interval 9910 (4954500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8352 8 episodes - episode_reward: -174.390 [-225.736, -100.000] - loss: 12.859 - mae: 85.293 - mean_q: -112.239 Interval 9911 (4955000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9753 6 episodes - episode_reward: -158.329 [-209.876, -67.509] - loss: 12.064 - mae: 85.283 - mean_q: -112.248 Interval 9912 (4955500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4691 8 episodes - episode_reward: -152.383 [-213.583, 18.382] - loss: 16.043 - mae: 85.285 - mean_q: -112.238 Interval 9913 (4956000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8580 8 episodes - episode_reward: -185.151 [-280.761, -71.570] - loss: 12.746 - mae: 85.276 - mean_q: -112.247 Interval 9914 (4956500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3870 7 episodes - episode_reward: -161.960 [-220.916, -108.961] - loss: 12.215 - mae: 85.275 - mean_q: -112.235 Interval 9915 (4957000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.4132 10 episodes - episode_reward: -177.117 [-232.368, -106.238] - loss: 14.212 - mae: 85.267 - mean_q: -112.246 Interval 9916 (4957500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4971 6 episodes - episode_reward: -204.309 [-288.589, -98.717] - loss: 15.332 - mae: 85.262 - mean_q: -112.233 Interval 9917 (4958000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9604 6 episodes - episode_reward: -166.342 [-266.831, -8.428] - loss: 13.437 - mae: 85.250 - mean_q: -112.228 Interval 9918 (4958500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7763 7 episodes - episode_reward: -197.280 [-380.633, -129.333] - loss: 12.441 - mae: 85.232 - mean_q: -112.234 Interval 9919 (4959000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5120 7 episodes - episode_reward: -174.450 [-220.266, -130.169] - loss: 12.075 - mae: 85.243 - mean_q: -112.239 Interval 9920 (4959500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3116 8 episodes - episode_reward: -208.357 [-298.511, -100.000] - loss: 11.147 - mae: 85.229 - mean_q: -112.259 Interval 9921 (4960000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4417 8 episodes - episode_reward: -154.229 [-229.779, -25.964] - loss: 12.382 - mae: 85.219 - mean_q: -112.284 Interval 9922 (4960500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9337 8 episodes - episode_reward: -181.883 [-367.616, -33.735] - loss: 12.368 - mae: 85.224 - mean_q: -112.289 Interval 9923 (4961000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6542 7 episodes - episode_reward: -194.198 [-322.529, -88.175] - loss: 12.262 - mae: 85.240 - mean_q: -112.305 Interval 9924 (4961500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0623 8 episodes - episode_reward: -186.298 [-243.104, -145.129] - loss: 12.660 - mae: 85.254 - mean_q: -112.286 Interval 9925 (4962000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2341 7 episodes - episode_reward: -160.295 [-217.416, -83.507] - loss: 11.497 - mae: 85.245 - mean_q: -112.288 Interval 9926 (4962500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.5514 10 episodes - episode_reward: -177.205 [-228.818, -100.000] - loss: 11.458 - mae: 85.244 - mean_q: -112.299 Interval 9927 (4963000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1161 7 episodes - episode_reward: -146.831 [-273.109, 24.280] - loss: 13.658 - mae: 85.236 - mean_q: -112.286 Interval 9928 (4963500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9412 7 episodes - episode_reward: -215.564 [-325.574, -139.498] - loss: 13.715 - mae: 85.234 - mean_q: -112.266 Interval 9929 (4964000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3639 7 episodes - episode_reward: -171.521 [-320.263, -32.037] - loss: 13.162 - mae: 85.214 - mean_q: -112.247 Interval 9930 (4964500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6397 7 episodes - episode_reward: -183.177 [-226.133, -145.553] - loss: 11.791 - mae: 85.203 - mean_q: -112.256 Interval 9931 (4965000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1902 9 episodes - episode_reward: -180.515 [-378.182, -29.575] - loss: 13.865 - mae: 85.226 - mean_q: -112.236 Interval 9932 (4965500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4664 9 episodes - episode_reward: -137.085 [-198.060, -3.864] - loss: 12.529 - mae: 85.210 - mean_q: -112.235 Interval 9933 (4966000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6066 6 episodes - episode_reward: -218.972 [-331.596, -87.560] - loss: 9.739 - mae: 85.190 - mean_q: -112.265 Interval 9934 (4966500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6971 7 episodes - episode_reward: -193.439 [-245.524, -150.549] - loss: 15.338 - mae: 85.206 - mean_q: -112.285 Interval 9935 (4967000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3685 6 episodes - episode_reward: -190.986 [-254.381, -113.402] - loss: 12.632 - mae: 85.186 - mean_q: -112.280 Interval 9936 (4967500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7606 9 episodes - episode_reward: -157.447 [-199.697, -124.007] - loss: 11.699 - mae: 85.194 - mean_q: -112.282 Interval 9937 (4968000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6975 7 episodes - episode_reward: -188.701 [-309.395, -117.864] - loss: 14.070 - mae: 85.206 - mean_q: -112.279 Interval 9938 (4968500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6332 8 episodes - episode_reward: -164.612 [-223.407, -100.000] - loss: 14.290 - mae: 85.198 - mean_q: -112.272 Interval 9939 (4969000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8296 9 episodes - episode_reward: -160.731 [-216.650, -108.403] - loss: 14.034 - mae: 85.182 - mean_q: -112.264 Interval 9940 (4969500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1944 9 episodes - episode_reward: -177.593 [-261.431, -99.297] - loss: 15.003 - mae: 85.171 - mean_q: -112.243 Interval 9941 (4970000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3158 8 episodes - episode_reward: -145.174 [-206.973, 4.225] - loss: 12.103 - mae: 85.125 - mean_q: -112.237 Interval 9942 (4970500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9086 5 episodes - episode_reward: -179.357 [-195.199, -162.619] - loss: 18.208 - mae: 85.114 - mean_q: -112.209 Interval 9943 (4971000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.8628 6 episodes - episode_reward: -151.690 [-283.762, -79.348] - loss: 9.038 - mae: 85.049 - mean_q: -112.188 Interval 9944 (4971500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.1913 8 episodes - episode_reward: -142.682 [-301.381, -16.621] - loss: 13.730 - mae: 85.039 - mean_q: -112.187 Interval 9945 (4972000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8903 8 episodes - episode_reward: -184.571 [-294.294, -135.767] - loss: 10.607 - mae: 85.015 - mean_q: -112.168 Interval 9946 (4972500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9491 8 episodes - episode_reward: -184.098 [-267.042, -94.498] - loss: 14.393 - mae: 84.977 - mean_q: -112.152 Interval 9947 (4973000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -4.6602 5 episodes - episode_reward: -463.913 [-630.674, -252.367] - loss: 12.882 - mae: 84.927 - mean_q: -112.084 Interval 9948 (4973500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -5.2086 6 episodes - episode_reward: -436.737 [-790.476, -144.020] - loss: 13.020 - mae: 84.883 - mean_q: -112.026 Interval 9949 (4974000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.1153 9 episodes - episode_reward: -212.856 [-439.623, -99.369] - loss: 12.804 - mae: 84.881 - mean_q: -112.028 Interval 9950 (4974500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.2939 7 episodes - episode_reward: -178.164 [-273.073, -63.573] - loss: 12.075 - mae: 84.882 - mean_q: -112.012 Interval 9951 (4975000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7551 9 episodes - episode_reward: -154.504 [-209.827, -100.000] - loss: 12.665 - mae: 84.878 - mean_q: -111.977 Interval 9952 (4975500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5612 6 episodes - episode_reward: -204.496 [-260.047, -159.896] - loss: 9.243 - mae: 84.858 - mean_q: -111.974 Interval 9953 (4976000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8615 8 episodes - episode_reward: -188.833 [-269.211, -144.001] - loss: 11.372 - mae: 84.852 - mean_q: -111.967 Interval 9954 (4976500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4214 6 episodes - episode_reward: -193.317 [-287.370, -148.553] - loss: 17.770 - mae: 84.863 - mean_q: -111.920 Interval 9955 (4977000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3972 6 episodes - episode_reward: -199.193 [-272.230, -155.047] - loss: 11.750 - mae: 84.828 - mean_q: -111.878 Interval 9956 (4977500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1915 10 episodes - episode_reward: -164.398 [-286.198, 30.387] - loss: 13.084 - mae: 84.825 - mean_q: -111.850 Interval 9957 (4978000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3202 7 episodes - episode_reward: -162.923 [-279.059, -25.895] - loss: 15.031 - mae: 84.810 - mean_q: -111.818 Interval 9958 (4978500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8113 8 episodes - episode_reward: -170.335 [-214.005, -131.540] - loss: 13.285 - mae: 84.804 - mean_q: -111.782 Interval 9959 (4979000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3824 10 episodes - episode_reward: -172.073 [-232.391, -100.000] - loss: 16.072 - mae: 84.774 - mean_q: -111.726 Interval 9960 (4979500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6596 8 episodes - episode_reward: -162.544 [-241.924, -27.466] - loss: 13.074 - mae: 84.718 - mean_q: -111.710 Interval 9961 (4980000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8345 8 episodes - episode_reward: -180.102 [-267.499, -143.409] - loss: 12.609 - mae: 84.666 - mean_q: -111.692 Interval 9962 (4980500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.0482 10 episodes - episode_reward: -150.452 [-229.877, -100.000] - loss: 12.475 - mae: 84.644 - mean_q: -111.682 Interval 9963 (4981000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3841 6 episodes - episode_reward: -202.316 [-269.293, -168.972] - loss: 9.966 - mae: 84.599 - mean_q: -111.674 Interval 9964 (4981500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7846 7 episodes - episode_reward: -209.140 [-297.182, -119.612] - loss: 15.623 - mae: 84.604 - mean_q: -111.660 Interval 9965 (4982000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.6914 6 episodes - episode_reward: -129.715 [-154.731, -74.705] - loss: 11.404 - mae: 84.552 - mean_q: -111.621 Interval 9966 (4982500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5148 8 episodes - episode_reward: -160.891 [-227.145, -65.911] - loss: 9.742 - mae: 84.514 - mean_q: -111.610 Interval 9967 (4983000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9462 8 episodes - episode_reward: -184.068 [-390.440, -125.903] - loss: 12.980 - mae: 84.501 - mean_q: -111.582 Interval 9968 (4983500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9739 8 episodes - episode_reward: -182.807 [-257.971, -105.690] - loss: 13.339 - mae: 84.468 - mean_q: -111.547 Interval 9969 (4984000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.8816: 0s - reward: 7 episodes - episode_reward: -207.769 [-271.659, -171.385] - loss: 10.000 - mae: 84.420 - mean_q: -111.542 Interval 9970 (4984500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6731 8 episodes - episode_reward: -151.657 [-240.407, -61.693] - loss: 12.129 - mae: 84.399 - mean_q: -111.547 Interval 9971 (4985000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7467 10 episodes - episode_reward: -142.615 [-270.308, 18.967] - loss: 11.539 - mae: 84.367 - mean_q: -111.522 Interval 9972 (4985500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5821 6 episodes - episode_reward: -220.701 [-307.817, -100.908] - loss: 10.699 - mae: 84.327 - mean_q: -111.529 Interval 9973 (4986000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1298 9 episodes - episode_reward: -181.178 [-228.797, -100.000] - loss: 17.379 - mae: 84.343 - mean_q: -111.485 Interval 9974 (4986500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7944 8 episodes - episode_reward: -167.900 [-247.438, -100.000] - loss: 11.626 - mae: 84.297 - mean_q: -111.436 Interval 9975 (4987000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4505 10 episodes - episode_reward: -176.650 [-207.975, -142.731] - loss: 9.964 - mae: 84.263 - mean_q: -111.419 Interval 9976 (4987500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7559 6 episodes - episode_reward: -230.168 [-277.977, -168.737] - loss: 11.574 - mae: 84.245 - mean_q: -111.417 Interval 9977 (4988000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9416 6 episodes - episode_reward: -161.214 [-214.780, -114.944] - loss: 9.494 - mae: 84.231 - mean_q: -111.410 Interval 9978 (4988500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7918 9 episodes - episode_reward: -153.801 [-208.122, -54.660] - loss: 11.437 - mae: 84.205 - mean_q: -111.414 Interval 9979 (4989000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9174 7 episodes - episode_reward: -204.194 [-261.352, -131.306] - loss: 12.514 - mae: 84.194 - mean_q: -111.412 Interval 9980 (4989500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0361 9 episodes - episode_reward: -166.349 [-212.169, -125.052] - loss: 10.128 - mae: 84.179 - mean_q: -111.397 Interval 9981 (4990000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9065 8 episodes - episode_reward: -183.889 [-220.418, -143.051] - loss: 12.883 - mae: 84.179 - mean_q: -111.381 Interval 9982 (4990500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2029 10 episodes - episode_reward: -163.216 [-215.565, -92.857] - loss: 11.116 - mae: 84.153 - mean_q: -111.357 Interval 9983 (4991000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0470 9 episodes - episode_reward: -168.742 [-272.935, -48.938] - loss: 10.131 - mae: 84.127 - mean_q: -111.359 Interval 9984 (4991500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5983 7 episodes - episode_reward: -177.911 [-238.771, -121.203] - loss: 9.451 - mae: 84.111 - mean_q: -111.357 Interval 9985 (4992000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.0195 8 episodes - episode_reward: -186.931 [-241.942, -105.369] - loss: 12.201 - mae: 84.133 - mean_q: -111.346 Interval 9986 (4992500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4922 8 episodes - episode_reward: -157.433 [-231.209, -47.800] - loss: 14.094 - mae: 84.118 - mean_q: -111.315 Interval 9987 (4993000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.9257 9 episodes - episode_reward: -214.261 [-380.913, -100.000] - loss: 12.742 - mae: 84.107 - mean_q: -111.305 Interval 9988 (4993500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -1.9349 8 episodes - episode_reward: -129.633 [-218.209, 32.426] - loss: 10.098 - mae: 84.076 - mean_q: -111.301 Interval 9989 (4994000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5985 8 episodes - episode_reward: -157.974 [-213.323, -69.153] - loss: 13.128 - mae: 84.079 - mean_q: -111.273 Interval 9990 (4994500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.9409 8 episodes - episode_reward: -188.038 [-259.745, -122.448] - loss: 12.490 - mae: 84.054 - mean_q: -111.234 Interval 9991 (4995000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.7254 8 episodes - episode_reward: -175.326 [-229.065, -109.367] - loss: 10.692 - mae: 84.024 - mean_q: -111.232 Interval 9992 (4995500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1335 9 episodes - episode_reward: -170.753 [-222.977, -113.455] - loss: 10.910 - mae: 84.024 - mean_q: -111.223 Interval 9993 (4996000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.5330 9 episodes - episode_reward: -142.363 [-184.403, -43.590] - loss: 11.252 - mae: 84.017 - mean_q: -111.208 Interval 9994 (4996500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4306 7 episodes - episode_reward: -175.506 [-225.529, -114.748] - loss: 9.686 - mae: 84.006 - mean_q: -111.198 Interval 9995 (4997000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.2024 9 episodes - episode_reward: -172.531 [-231.109, -100.000] - loss: 11.305 - mae: 84.009 - mean_q: -111.180 Interval 9996 (4997500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.3117 7 episodes - episode_reward: -171.734 [-236.343, -137.477] - loss: 10.052 - mae: 83.999 - mean_q: -111.142 Interval 9997 (4998000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.1103 8 episodes - episode_reward: -189.838 [-302.682, -127.451] - loss: 10.430 - mae: 83.976 - mean_q: -111.156 Interval 9998 (4998500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6878 8 episodes - episode_reward: -166.109 [-226.441, -133.308] - loss: 10.316 - mae: 83.958 - mean_q: -111.157 Interval 9999 (4999000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.6041 7 episodes - episode_reward: -187.017 [-265.281, -145.618] - loss: 12.138 - mae: 83.943 - mean_q: -111.132 Interval 10000 (4999500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9405 done, took 25535.869 seconds
weights.append(f'dqn_lunar_weights.h5f')
models.append(dqn)
models[-1].load_weights(weights[-1])
training_time = end_time - start_time
training_time
27880.695119857788
df = pd.DataFrame(history.history)
ax = df['episode_reward'].plot(color = 'lightgray')
df['episode_reward'].rolling(50).mean().plot(color = 'black')
ax.set_xlabel("Episode")
plt.ylabel("Rolling Mean (10) Cumulative Return")
plt.show()
The model has performed very poorly, the reward has barely ever made it over zero, staying pretty firmly in the negative numbers.
rl['Model 2'] = [1, '512/256/128', 5000000, 50000, 500, None]
Before adjusting the memory and log parameter, I decided to modify the network architecture slightly, to see if adding more connections (and complexity), might allow the network to better learn over a long period.
# neural netwok model
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())
Model: "sequential_38" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_37 (Flatten) (None, 8) 0 _________________________________________________________________ dense_148 (Dense) (None, 512) 4608 _________________________________________________________________ activation_148 (Activation) (None, 512) 0 _________________________________________________________________ dense_149 (Dense) (None, 256) 131328 _________________________________________________________________ activation_149 (Activation) (None, 256) 0 _________________________________________________________________ dense_150 (Dense) (None, 128) 32896 _________________________________________________________________ activation_150 (Activation) (None, 128) 0 _________________________________________________________________ dense_151 (Dense) (None, 4) 516 _________________________________________________________________ activation_151 (Activation) (None, 4) 0 ================================================================= Total params: 169,348 Trainable params: 169,348 Non-trainable params: 0 _________________________________________________________________ None
memory = SequentialMemory(limit=50000, window_length=1)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=30,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
weights_filename = f'dqn_lunar_weights_two.h5f'
checkpoint_weights_filename = 'dqn_lunar_weights_{step}.h5f'
log_filename = f'dqn_lunar_log.json'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]
start_time = time.time()
history = dqn.fit(env, callbacks=callbacks, nb_steps=5000000, log_interval=500)
end_time = time.time()
weights.append(f'dqn_lunar_weights_two.h5f')
models.append(dqn)
models[-1].load_weights(weights[-1])
df = pd.DataFrame(history.history)
ax = df['episode_reward'].plot(color = 'lightgray')
df['episode_reward'].rolling(50).mean().plot(color = 'black')
ax.set_xlabel("Episode")
plt.ylabel("Rolling Mean (10) Cumulative Return")
plt.show()
It has behaved pretty similarly to the first model. Performance is poor.
There isn't a lot of documentation for Keras-RL, but from what I've understood, the window-length parameter coontrols how many samples are concatenated to form a "state". I believe setting this to 4, is somewhat equivalent to stacking four images for the CNN. I have reduced training time to 250,000 steps, as it was simply taking too long.
rl['Model 3'] = [4, '128/64/32', 250000, 50000, 500, None]
When window size is changed, the model architecture also needs to be slightly adjusted, as the initial "Flatten" layer input-shape, must reflect the window-length.
model = Sequential()
model.add(Flatten(input_shape=(4,) + env.observation_space.shape))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())
Model: "sequential_39" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_38 (Flatten) (None, 32) 0 _________________________________________________________________ dense_152 (Dense) (None, 128) 4224 _________________________________________________________________ activation_152 (Activation) (None, 128) 0 _________________________________________________________________ dense_153 (Dense) (None, 64) 8256 _________________________________________________________________ activation_153 (Activation) (None, 64) 0 _________________________________________________________________ dense_154 (Dense) (None, 32) 2080 _________________________________________________________________ activation_154 (Activation) (None, 32) 0 _________________________________________________________________ dense_155 (Dense) (None, 4) 132 _________________________________________________________________ activation_155 (Activation) (None, 4) 0 ================================================================= Total params: 14,692 Trainable params: 14,692 Non-trainable params: 0 _________________________________________________________________ None
memory = SequentialMemory(limit=50000, window_length=4)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=30,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
weights_filename = f'dqn_lunar_weights_three.h5f'
checkpoint_weights_filename = 'dqn_lunar_weights_{step}.h5f'
log_filename = f'dqn_lunar_log.json'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]
start_time = time.time()
history = dqn.fit(env, callbacks=callbacks, nb_steps=250000, log_interval=500)
end_time = time.time()
Training for 250000 steps ... Interval 1 (0 steps performed) 1/500 [..............................] - ETA: 53s - reward: 0.5876
/Users/finolacahill/opt/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:2325: UserWarning: `Model.state_updates` will be removed in a future version. This property should not be used in TensorFlow 2.0, as `updates` are applied automatically.
warnings.warn('`Model.state_updates` will be removed in a future version. '
/Users/finolacahill/opt/anaconda3/lib/python3.8/site-packages/rl/memory.py:40: UserWarning: Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!
warnings.warn('Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!')
500/500 [==============================] - 3s 6ms/step - reward: -3.1131 5 episodes - episode_reward: -277.237 [-551.632, -121.857] - loss: 29.763 - mae: 5.930 - mean_q: -3.329 Interval 2 (500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8036 4 episodes - episode_reward: -393.534 [-495.486, -211.179] - loss: 22.463 - mae: 12.092 - mean_q: -9.282 Interval 3 (1000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5818 4 episodes - episode_reward: -270.248 [-552.909, -110.824] - loss: 14.705 - mae: 16.848 - mean_q: -13.583 Interval 4 (1500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2545 4 episodes - episode_reward: -211.766 [-524.698, -46.129] - loss: 9.855 - mae: 22.448 - mean_q: -17.780 Interval 5 (2000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7505 2 episodes - episode_reward: -188.300 [-214.067, -162.533] - loss: 8.709 - mae: 24.761 - mean_q: -17.940 Interval 6 (2500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4816 1 episodes - episode_reward: -252.139 [-252.139, -252.139] - loss: 12.090 - mae: 25.250 - mean_q: -14.712 Interval 7 (3000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1444 Interval 8 (3500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9881 2 episodes - episode_reward: -272.333 [-440.025, -104.642] - loss: 9.046 - mae: 25.744 - mean_q: -12.884 Interval 9 (4000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4193 1 episodes - episode_reward: -137.796 [-137.796, -137.796] - loss: 10.107 - mae: 26.437 - mean_q: -10.514 Interval 10 (4500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3157 1 episodes - episode_reward: -286.880 [-286.880, -286.880] - loss: 9.718 - mae: 26.068 - mean_q: -7.373 Interval 11 (5000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8944 4 episodes - episode_reward: -98.485 [-131.598, -40.177] - loss: 6.935 - mae: 25.943 - mean_q: -2.513 Interval 12 (5500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1878 Interval 13 (6000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9112 5 episodes - episode_reward: -72.525 [-180.517, 200.029] - loss: 8.220 - mae: 26.363 - mean_q: 1.407 Interval 14 (6500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4799 1 episodes - episode_reward: -240.509 [-240.509, -240.509] - loss: 6.690 - mae: 26.268 - mean_q: 2.956 Interval 15 (7000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5536 2 episodes - episode_reward: -107.394 [-139.748, -75.039] - loss: 10.117 - mae: 26.894 - mean_q: 4.249 Interval 16 (7500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5895 2 episodes - episode_reward: -163.684 [-164.016, -163.351] - loss: 6.888 - mae: 27.603 - mean_q: 6.616 Interval 17 (8000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9128 3 episodes - episode_reward: -162.999 [-274.892, -104.751] - loss: 9.328 - mae: 27.888 - mean_q: 8.003 Interval 18 (8500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0037 1 episodes - episode_reward: -28.730 [-28.730, -28.730] - loss: 7.542 - mae: 28.723 - mean_q: 8.897 Interval 19 (9000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0132 Interval 20 (9500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0257 Interval 21 (10000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2593 1 episodes - episode_reward: 116.969 [116.969, 116.969] - loss: 7.356 - mae: 28.882 - mean_q: 8.492 Interval 22 (10500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0130 Interval 23 (11000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2197 Interval 24 (11500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0908 Interval 25 (12000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2451 Interval 26 (12500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2013 Interval 27 (13000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1528 Interval 28 (13500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1814 Interval 29 (14000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1420 Interval 30 (14500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1552 Interval 31 (15000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.2003 Interval 32 (15500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1646 Interval 33 (16000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1282 Interval 34 (16500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1882 Interval 35 (17000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1604 Interval 36 (17500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1882 Interval 37 (18000 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.1857 Interval 38 (18500 steps performed) 500/500 [==============================] - 10s 19ms/step - reward: -0.1829 Interval 39 (19000 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.1694 Interval 40 (19500 steps performed) 500/500 [==============================] - 11s 22ms/step - reward: -0.1948 Interval 41 (20000 steps performed) 500/500 [==============================] - 11s 23ms/step - reward: -0.1723 Interval 42 (20500 steps performed) 500/500 [==============================] - 11s 22ms/step - reward: -0.1799 Interval 43 (21000 steps performed) 500/500 [==============================] - 12s 23ms/step - reward: -0.1952 Interval 44 (21500 steps performed) 500/500 [==============================] - 12s 25ms/step - reward: -0.2051 Interval 45 (22000 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -0.1376 Interval 46 (22500 steps performed) 500/500 [==============================] - 14s 27ms/step - reward: -0.1646 Interval 47 (23000 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: 0.0289 Interval 48 (23500 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -0.2752 Interval 49 (24000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.0134 3 episodes - episode_reward: -734.265 [-2073.943, -51.252] - loss: 5.594 - mae: 27.718 - mean_q: 32.864 Interval 50 (24500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0628 Interval 51 (25000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8406 2 episodes - episode_reward: 204.126 [161.361, 246.890] - loss: 8.043 - mae: 28.613 - mean_q: 34.437 Interval 52 (25500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4767 2 episodes - episode_reward: -111.789 [-139.374, -84.204] - loss: 6.394 - mae: 28.884 - mean_q: 34.464 Interval 53 (26000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2908 1 episodes - episode_reward: 162.989 [162.989, 162.989] - loss: 7.545 - mae: 29.177 - mean_q: 34.824 Interval 54 (26500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5450 2 episodes - episode_reward: -136.610 [-146.289, -126.931] - loss: 7.561 - mae: 29.294 - mean_q: 35.013 Interval 55 (27000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1893 2 episodes - episode_reward: 51.047 [-131.685, 233.778] - loss: 6.395 - mae: 29.376 - mean_q: 35.191 Interval 56 (27500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0227 Interval 57 (28000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1639 Interval 58 (28500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1209 Interval 59 (29000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.3433 1 episodes - episode_reward: -17.467 [-17.467, -17.467] - loss: 6.062 - mae: 30.035 - mean_q: 36.123 Interval 60 (29500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0446 Interval 61 (30000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1161 Interval 62 (30500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.1262 1 episodes - episode_reward: 91.759 [91.759, 91.759] - loss: 6.499 - mae: 29.913 - mean_q: 36.392 Interval 63 (31000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0447 Interval 64 (31500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0015 Interval 65 (32000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2771 1 episodes - episode_reward: 33.632 [33.632, 33.632] - loss: 6.342 - mae: 29.786 - mean_q: 36.522 Interval 66 (32500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2713 1 episodes - episode_reward: 170.822 [170.822, 170.822] - loss: 5.472 - mae: 29.619 - mean_q: 36.628 Interval 67 (33000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1162 Interval 68 (33500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3693 1 episodes - episode_reward: 160.255 [160.255, 160.255] - loss: 4.446 - mae: 29.739 - mean_q: 37.179 Interval 69 (34000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2582 3 episodes - episode_reward: -34.943 [-261.324, 211.771] - loss: 6.728 - mae: 30.063 - mean_q: 37.593 Interval 70 (34500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1164 Interval 71 (35000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1319 Interval 72 (35500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0206 Interval 73 (36000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3348 1 episodes - episode_reward: 45.239 [45.239, 45.239] - loss: 5.057 - mae: 29.618 - mean_q: 37.025 Interval 74 (36500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2485 1 episodes - episode_reward: 159.249 [159.249, 159.249] - loss: 4.810 - mae: 29.699 - mean_q: 37.117 Interval 75 (37000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3042 Interval 76 (37500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0845 3 episodes - episode_reward: 37.271 [-182.743, 274.196] - loss: 5.697 - mae: 30.025 - mean_q: 37.394 Interval 77 (38000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0313 Interval 78 (38500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0154 Interval 79 (39000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2140 2 episodes - episode_reward: -12.120 [-112.809, 88.569] - loss: 6.614 - mae: 30.569 - mean_q: 37.554 Interval 80 (39500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2140 2 episodes - episode_reward: 107.370 [-53.046, 267.785] - loss: 5.191 - mae: 30.570 - mean_q: 37.300 Interval 81 (40000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3949 1 episodes - episode_reward: -208.628 [-208.628, -208.628] - loss: 3.923 - mae: 30.841 - mean_q: 37.607 Interval 82 (40500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0839 1 episodes - episode_reward: -32.316 [-32.316, -32.316] - loss: 4.369 - mae: 31.007 - mean_q: 37.998 Interval 83 (41000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1470 1 episodes - episode_reward: 203.854 [203.854, 203.854] - loss: 4.921 - mae: 30.743 - mean_q: 37.351 Interval 84 (41500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1515 1 episodes - episode_reward: -182.975 [-182.975, -182.975] - loss: 4.585 - mae: 30.758 - mean_q: 37.442 Interval 85 (42000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4040 1 episodes - episode_reward: 224.799 [224.799, 224.799] - loss: 8.253 - mae: 30.334 - mean_q: 37.178 Interval 86 (42500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0304 Interval 87 (43000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3265 2 episodes - episode_reward: 84.222 [5.197, 163.248] - loss: 7.492 - mae: 30.252 - mean_q: 36.903 Interval 88 (43500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0163 Interval 89 (44000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0050 Interval 90 (44500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0637 Interval 91 (45000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1152 2 episodes - episode_reward: 10.132 [-123.442, 143.707] - loss: 5.359 - mae: 30.033 - mean_q: 37.173 Interval 92 (45500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1572 Interval 93 (46000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3008 1 episodes - episode_reward: 239.358 [239.358, 239.358] - loss: 5.372 - mae: 30.045 - mean_q: 37.224 Interval 94 (46500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5045 1 episodes - episode_reward: 158.784 [158.784, 158.784] - loss: 5.021 - mae: 30.503 - mean_q: 37.811 Interval 95 (47000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2207 2 episodes - episode_reward: -31.660 [-344.376, 281.056] - loss: 6.027 - mae: 30.404 - mean_q: 37.385 Interval 96 (47500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0032 2 episodes - episode_reward: 27.785 [-107.926, 163.496] - loss: 6.562 - mae: 30.482 - mean_q: 37.230 Interval 97 (48000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3038 1 episodes - episode_reward: 192.096 [192.096, 192.096] - loss: 5.517 - mae: 30.489 - mean_q: 37.558 Interval 98 (48500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3951 1 episodes - episode_reward: 191.981 [191.981, 191.981] - loss: 7.670 - mae: 30.496 - mean_q: 37.356 Interval 99 (49000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2359 Interval 100 (49500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4134 1 episodes - episode_reward: 224.956 [224.956, 224.956] - loss: 5.655 - mae: 30.220 - mean_q: 37.599 Interval 101 (50000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1919 1 episodes - episode_reward: 203.954 [203.954, 203.954] - loss: 4.374 - mae: 30.322 - mean_q: 37.804 Interval 102 (50500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1364 2 episodes - episode_reward: 37.885 [-124.363, 200.133] - loss: 8.699 - mae: 30.158 - mean_q: 38.107 Interval 103 (51000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1373 Interval 104 (51500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.8564 2 episodes - episode_reward: 235.754 [177.145, 294.364] - loss: 5.743 - mae: 29.850 - mean_q: 38.486 Interval 105 (52000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2187 1 episodes - episode_reward: -46.078 [-46.078, -46.078] - loss: 5.914 - mae: 29.626 - mean_q: 38.383 Interval 106 (52500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1580 1 episodes - episode_reward: 129.008 [129.008, 129.008] - loss: 5.268 - mae: 29.649 - mean_q: 38.401 Interval 107 (53000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1094 1 episodes - episode_reward: -98.409 [-98.409, -98.409] - loss: 7.808 - mae: 30.047 - mean_q: 38.801 Interval 108 (53500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0410 Interval 109 (54000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5897 4 episodes - episode_reward: -51.727 [-252.992, 182.757] - loss: 7.441 - mae: 29.625 - mean_q: 38.330 Interval 110 (54500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2741 Interval 111 (55000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3299 1 episodes - episode_reward: 212.929 [212.929, 212.929] - loss: 7.941 - mae: 29.553 - mean_q: 38.423 Interval 112 (55500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2623 1 episodes - episode_reward: 183.684 [183.684, 183.684] - loss: 5.590 - mae: 29.441 - mean_q: 38.329 Interval 113 (56000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7718 1 episodes - episode_reward: -259.318 [-259.318, -259.318] - loss: 4.278 - mae: 29.183 - mean_q: 37.696 Interval 114 (56500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3935 2 episodes - episode_reward: 38.905 [-127.233, 205.043] - loss: 5.826 - mae: 29.321 - mean_q: 37.538 Interval 115 (57000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1831 Interval 116 (57500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5681 2 episodes - episode_reward: 193.382 [177.488, 209.276] - loss: 4.444 - mae: 29.274 - mean_q: 38.333 Interval 117 (58000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3197 1 episodes - episode_reward: 154.661 [154.661, 154.661] - loss: 5.581 - mae: 29.013 - mean_q: 37.889 Interval 118 (58500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4255 2 episodes - episode_reward: 98.453 [-100.000, 296.906] - loss: 6.684 - mae: 29.421 - mean_q: 38.815 Interval 119 (59000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4922 1 episodes - episode_reward: 240.972 [240.972, 240.972] - loss: 7.628 - mae: 29.002 - mean_q: 37.952 Interval 120 (59500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3264 4 episodes - episode_reward: -39.498 [-100.000, -1.588] - loss: 6.172 - mae: 29.039 - mean_q: 38.219 Interval 121 (60000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0678 Interval 122 (60500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2692 Interval 123 (61000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3196 1 episodes - episode_reward: -321.600 [-321.600, -321.600] - loss: 4.816 - mae: 29.720 - mean_q: 39.098 Interval 124 (61500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2328 1 episodes - episode_reward: 187.115 [187.115, 187.115] - loss: 7.280 - mae: 30.146 - mean_q: 39.373 Interval 125 (62000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3724 1 episodes - episode_reward: -428.892 [-428.892, -428.892] - loss: 9.247 - mae: 31.175 - mean_q: 40.854 Interval 126 (62500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1180 1 episodes - episode_reward: 124.738 [124.738, 124.738] - loss: 5.098 - mae: 32.034 - mean_q: 42.173 Interval 127 (63000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0835 Interval 128 (63500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2385 2 episodes - episode_reward: -95.864 [-277.384, 85.657] - loss: 5.521 - mae: 33.176 - mean_q: 43.608 Interval 129 (64000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1966 2 episodes - episode_reward: 33.037 [-138.272, 204.346] - loss: 6.804 - mae: 33.997 - mean_q: 44.566 Interval 130 (64500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3064 1 episodes - episode_reward: 277.333 [277.333, 277.333] - loss: 6.514 - mae: 34.765 - mean_q: 45.395 Interval 131 (65000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0488 1 episodes - episode_reward: -58.624 [-58.624, -58.624] - loss: 5.843 - mae: 35.260 - mean_q: 46.143 Interval 132 (65500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0570 Interval 133 (66000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0418 Interval 134 (66500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0370 Interval 135 (67000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0389 Interval 136 (67500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0881 2 episodes - episode_reward: -63.808 [-204.707, 77.091] - loss: 7.397 - mae: 37.461 - mean_q: 49.179 Interval 137 (68000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4327 5 episodes - episode_reward: -29.537 [-121.522, 251.052] - loss: 4.435 - mae: 37.864 - mean_q: 49.764 Interval 138 (68500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0379 1 episodes - episode_reward: -148.392 [-148.392, -148.392] - loss: 7.213 - mae: 38.270 - mean_q: 50.085 Interval 139 (69000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4520 1 episodes - episode_reward: 233.022 [233.022, 233.022] - loss: 8.403 - mae: 38.537 - mean_q: 50.110 Interval 140 (69500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1908 1 episodes - episode_reward: 250.135 [250.135, 250.135] - loss: 5.361 - mae: 38.845 - mean_q: 51.080 Interval 141 (70000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3095 3 episodes - episode_reward: -106.399 [-291.383, 100.358] - loss: 8.232 - mae: 39.058 - mean_q: 51.015 Interval 142 (70500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0921 2 episodes - episode_reward: 28.300 [-175.045, 231.645] - loss: 6.500 - mae: 39.351 - mean_q: 51.334 Interval 143 (71000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3132 1 episodes - episode_reward: 205.191 [205.191, 205.191] - loss: 8.263 - mae: 39.692 - mean_q: 51.397 Interval 144 (71500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4628 1 episodes - episode_reward: 146.052 [146.052, 146.052] - loss: 7.787 - mae: 40.218 - mean_q: 52.074 Interval 145 (72000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1150 Interval 146 (72500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1448 1 episodes - episode_reward: 223.931 [223.931, 223.931] - loss: 7.928 - mae: 41.177 - mean_q: 53.763 Interval 147 (73000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1071 Interval 148 (73500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2080 1 episodes - episode_reward: 143.337 [143.337, 143.337] - loss: 7.224 - mae: 41.504 - mean_q: 53.781 Interval 149 (74000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3249 1 episodes - episode_reward: 117.515 [117.515, 117.515] - loss: 6.650 - mae: 41.755 - mean_q: 53.809 Interval 150 (74500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3679 1 episodes - episode_reward: 276.315 [276.315, 276.315] - loss: 4.866 - mae: 42.081 - mean_q: 54.594 Interval 151 (75000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1580 1 episodes - episode_reward: -28.735 [-28.735, -28.735] - loss: 9.018 - mae: 42.336 - mean_q: 54.888 Interval 152 (75500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0442 Interval 153 (76000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2971 1 episodes - episode_reward: 165.882 [165.882, 165.882] - loss: 7.465 - mae: 42.084 - mean_q: 54.617 Interval 154 (76500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1743 2 episodes - episode_reward: 49.958 [-115.214, 215.130] - loss: 4.497 - mae: 42.237 - mean_q: 55.259 Interval 155 (77000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0495 Interval 156 (77500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2395 1 episodes - episode_reward: 162.225 [162.225, 162.225] - loss: 7.072 - mae: 41.924 - mean_q: 55.209 Interval 157 (78000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5514 1 episodes - episode_reward: 306.070 [306.070, 306.070] - loss: 6.713 - mae: 42.074 - mean_q: 55.212 Interval 158 (78500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0662 Interval 159 (79000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2686 1 episodes - episode_reward: 121.202 [121.202, 121.202] - loss: 7.552 - mae: 42.170 - mean_q: 55.268 Interval 160 (79500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0376 1 episodes - episode_reward: -82.087 [-82.087, -82.087] - loss: 7.735 - mae: 42.608 - mean_q: 55.812 Interval 161 (80000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0068 Interval 162 (80500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2614 1 episodes - episode_reward: 197.215 [197.215, 197.215] - loss: 6.045 - mae: 42.592 - mean_q: 55.665 Interval 163 (81000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1368 1 episodes - episode_reward: -51.980 [-51.980, -51.980] - loss: 7.262 - mae: 42.373 - mean_q: 55.233 Interval 164 (81500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5682 2 episodes - episode_reward: -125.088 [-157.236, -92.940] - loss: 9.093 - mae: 42.358 - mean_q: 55.054 Interval 165 (82000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3303 1 episodes - episode_reward: 90.542 [90.542, 90.542] - loss: 6.076 - mae: 42.533 - mean_q: 55.603 Interval 166 (82500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5205 1 episodes - episode_reward: 204.005 [204.005, 204.005] - loss: 10.928 - mae: 42.386 - mean_q: 55.525 Interval 167 (83000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3604 1 episodes - episode_reward: 211.339 [211.339, 211.339] - loss: 5.974 - mae: 42.532 - mean_q: 55.798 Interval 168 (83500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2038 1 episodes - episode_reward: 156.672 [156.672, 156.672] - loss: 8.440 - mae: 42.189 - mean_q: 55.194 Interval 169 (84000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1183 Interval 170 (84500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0652 Interval 171 (85000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0534 Interval 172 (85500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2355 1 episodes - episode_reward: 125.996 [125.996, 125.996] - loss: 6.684 - mae: 42.036 - mean_q: 54.906 Interval 173 (86000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2701 1 episodes - episode_reward: -101.195 [-101.195, -101.195] - loss: 9.799 - mae: 42.176 - mean_q: 55.048 Interval 174 (86500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4977 1 episodes - episode_reward: 179.708 [179.708, 179.708] - loss: 6.558 - mae: 42.493 - mean_q: 55.500 Interval 175 (87000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1164 Interval 176 (87500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3105 3 episodes - episode_reward: -23.162 [-161.606, 211.486] - loss: 4.244 - mae: 42.309 - mean_q: 55.541 Interval 177 (88000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1678 1 episodes - episode_reward: 116.980 [116.980, 116.980] - loss: 9.403 - mae: 42.346 - mean_q: 55.005 Interval 178 (88500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1516 1 episodes - episode_reward: -113.283 [-113.283, -113.283] - loss: 8.596 - mae: 42.408 - mean_q: 55.376 Interval 179 (89000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5395 1 episodes - episode_reward: 180.328 [180.328, 180.328] - loss: 8.506 - mae: 42.573 - mean_q: 55.558 Interval 180 (89500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5612 1 episodes - episode_reward: 223.322 [223.322, 223.322] - loss: 7.341 - mae: 42.374 - mean_q: 55.428 Interval 181 (90000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0343 2 episodes - episode_reward: 13.533 [-243.989, 271.054] - loss: 9.192 - mae: 42.560 - mean_q: 55.579 Interval 182 (90500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4237 1 episodes - episode_reward: 232.231 [232.231, 232.231] - loss: 8.107 - mae: 42.494 - mean_q: 55.566 Interval 183 (91000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.7762 2 episodes - episode_reward: 245.467 [212.865, 278.070] - loss: 6.032 - mae: 42.566 - mean_q: 56.037 Interval 184 (91500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0090 Interval 185 (92000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5727 3 episodes - episode_reward: -99.288 [-312.994, 138.675] - loss: 7.483 - mae: 42.228 - mean_q: 55.141 Interval 186 (92500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6817 1 episodes - episode_reward: 256.212 [256.212, 256.212] - loss: 7.115 - mae: 42.734 - mean_q: 55.746 Interval 187 (93000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3365 1 episodes - episode_reward: 215.586 [215.586, 215.586] - loss: 6.813 - mae: 42.410 - mean_q: 55.173 Interval 188 (93500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0966 Interval 189 (94000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4287 1 episodes - episode_reward: 105.203 [105.203, 105.203] - loss: 8.321 - mae: 42.425 - mean_q: 55.180 Interval 190 (94500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2601 3 episodes - episode_reward: -13.766 [-171.591, 230.293] - loss: 7.357 - mae: 42.389 - mean_q: 55.137 Interval 191 (95000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0219 2 episodes - episode_reward: 18.413 [-232.662, 269.488] - loss: 9.241 - mae: 42.785 - mean_q: 55.797 Interval 192 (95500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0200 1 episodes - episode_reward: -100.000 [-100.000, -100.000] - loss: 5.557 - mae: 42.873 - mean_q: 55.918 Interval 193 (96000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2786 1 episodes - episode_reward: 223.088 [223.088, 223.088] - loss: 6.143 - mae: 43.138 - mean_q: 56.109 Interval 194 (96500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6819 3 episodes - episode_reward: 126.442 [-110.820, 272.516] - loss: 8.091 - mae: 43.026 - mean_q: 56.058 Interval 195 (97000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3555 2 episodes - episode_reward: 62.967 [-141.488, 267.423] - loss: 9.772 - mae: 42.976 - mean_q: 56.007 Interval 196 (97500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1997 1 episodes - episode_reward: 147.098 [147.098, 147.098] - loss: 10.732 - mae: 43.211 - mean_q: 56.663 Interval 197 (98000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1525 Interval 198 (98500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3581 1 episodes - episode_reward: 165.593 [165.593, 165.593] - loss: 9.504 - mae: 43.274 - mean_q: 56.852 Interval 199 (99000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5607 1 episodes - episode_reward: 216.428 [216.428, 216.428] - loss: 5.996 - mae: 43.107 - mean_q: 56.498 Interval 200 (99500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6398 3 episodes - episode_reward: -72.390 [-353.372, 253.595] - loss: 5.976 - mae: 43.161 - mean_q: 56.547 Interval 201 (100000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7929 2 episodes - episode_reward: -194.407 [-287.722, -101.091] - loss: 7.794 - mae: 43.699 - mean_q: 57.446 Interval 202 (100500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2801 1 episodes - episode_reward: 209.445 [209.445, 209.445] - loss: 9.482 - mae: 43.422 - mean_q: 56.790 Interval 203 (101000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0276 2 episodes - episode_reward: -30.527 [-46.219, -14.835] - loss: 7.251 - mae: 44.015 - mean_q: 57.647 Interval 204 (101500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6170 1 episodes - episode_reward: 216.305 [216.305, 216.305] - loss: 11.380 - mae: 44.044 - mean_q: 57.280 Interval 205 (102000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2700 1 episodes - episode_reward: 241.237 [241.237, 241.237] - loss: 10.355 - mae: 43.721 - mean_q: 56.768 Interval 206 (102500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2859 1 episodes - episode_reward: 222.410 [222.410, 222.410] - loss: 8.861 - mae: 43.438 - mean_q: 56.792 Interval 207 (103000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3837 2 episodes - episode_reward: -191.755 [-283.510, -100.000] - loss: 10.866 - mae: 43.456 - mean_q: 56.553 Interval 208 (103500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8049 3 episodes - episode_reward: -91.424 [-425.835, 219.819] - loss: 10.131 - mae: 43.410 - mean_q: 56.237 Interval 209 (104000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6321 1 episodes - episode_reward: 241.667 [241.667, 241.667] - loss: 7.278 - mae: 43.556 - mean_q: 56.661 Interval 210 (104500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5669 1 episodes - episode_reward: -201.583 [-201.583, -201.583] - loss: 13.689 - mae: 43.196 - mean_q: 56.305 Interval 211 (105000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6695 1 episodes - episode_reward: 182.699 [182.699, 182.699] - loss: 8.575 - mae: 43.046 - mean_q: 56.242 Interval 212 (105500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2511 2 episodes - episode_reward: 124.656 [-25.716, 275.029] - loss: 9.064 - mae: 42.861 - mean_q: 55.597 Interval 213 (106000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0635 1 episodes - episode_reward: -48.148 [-48.148, -48.148] - loss: 9.041 - mae: 42.846 - mean_q: 55.745 Interval 214 (106500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3371 1 episodes - episode_reward: 230.403 [230.403, 230.403] - loss: 8.151 - mae: 42.426 - mean_q: 55.343 Interval 215 (107000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5203 1 episodes - episode_reward: 228.457 [228.457, 228.457] - loss: 7.507 - mae: 42.735 - mean_q: 55.805 Interval 216 (107500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0442 Interval 217 (108000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1246 1 episodes - episode_reward: 104.419 [104.419, 104.419] - loss: 9.813 - mae: 42.782 - mean_q: 55.551 Interval 218 (108500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3660 1 episodes - episode_reward: 194.938 [194.938, 194.938] - loss: 9.174 - mae: 42.530 - mean_q: 55.174 Interval 219 (109000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0704 Interval 220 (109500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.8634 2 episodes - episode_reward: 175.182 [113.095, 237.269] - loss: 8.542 - mae: 41.780 - mean_q: 54.351 Interval 221 (110000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.7594 2 episodes - episode_reward: 236.828 [212.601, 261.056] - loss: 11.942 - mae: 42.017 - mean_q: 54.893 Interval 222 (110500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7200 1 episodes - episode_reward: 274.189 [274.189, 274.189] - loss: 11.803 - mae: 42.349 - mean_q: 54.934 Interval 223 (111000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0788 2 episodes - episode_reward: 41.113 [-104.799, 187.025] - loss: 10.601 - mae: 41.943 - mean_q: 54.846 Interval 224 (111500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3640 1 episodes - episode_reward: 208.049 [208.049, 208.049] - loss: 8.598 - mae: 42.328 - mean_q: 54.922 Interval 225 (112000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0865 Interval 226 (112500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3896 1 episodes - episode_reward: 177.103 [177.103, 177.103] - loss: 7.434 - mae: 42.137 - mean_q: 54.834 Interval 227 (113000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4537 1 episodes - episode_reward: 226.054 [226.054, 226.054] - loss: 7.887 - mae: 42.763 - mean_q: 55.711 Interval 228 (113500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2699 Interval 229 (114000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2761 1 episodes - episode_reward: 309.375 [309.375, 309.375] - loss: 9.127 - mae: 43.434 - mean_q: 56.779 Interval 230 (114500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4962 2 episodes - episode_reward: 166.820 [63.316, 270.324] - loss: 8.143 - mae: 43.757 - mean_q: 57.119 Interval 231 (115000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6601 1 episodes - episode_reward: 159.872 [159.872, 159.872] - loss: 8.437 - mae: 44.299 - mean_q: 57.695 Interval 232 (115500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3386 1 episodes - episode_reward: 257.313 [257.313, 257.313] - loss: 8.869 - mae: 44.584 - mean_q: 58.302 Interval 233 (116000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0162 Interval 234 (116500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3665 1 episodes - episode_reward: 132.105 [132.105, 132.105] - loss: 7.879 - mae: 44.810 - mean_q: 58.917 Interval 235 (117000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1929 1 episodes - episode_reward: 63.074 [63.074, 63.074] - loss: 10.336 - mae: 45.552 - mean_q: 59.854 Interval 236 (117500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0478 3 episodes - episode_reward: 4.692 [-119.983, 234.059] - loss: 8.609 - mae: 45.767 - mean_q: 60.236 Interval 237 (118000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1219 1 episodes - episode_reward: 239.339 [239.339, 239.339] - loss: 7.738 - mae: 45.401 - mean_q: 59.686 Interval 238 (118500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4198 1 episodes - episode_reward: 179.406 [179.406, 179.406] - loss: 9.238 - mae: 45.766 - mean_q: 60.256 Interval 239 (119000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0860 Interval 240 (119500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4730 1 episodes - episode_reward: 169.532 [169.532, 169.532] - loss: 6.894 - mae: 45.714 - mean_q: 59.895 Interval 241 (120000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0318 Interval 242 (120500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4606 1 episodes - episode_reward: 228.357 [228.357, 228.357] - loss: 7.535 - mae: 45.504 - mean_q: 59.976 Interval 243 (121000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0751 2 episodes - episode_reward: 13.949 [-204.036, 231.935] - loss: 7.598 - mae: 45.582 - mean_q: 59.839 Interval 244 (121500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5155 1 episodes - episode_reward: 260.387 [260.387, 260.387] - loss: 6.236 - mae: 45.245 - mean_q: 59.532 Interval 245 (122000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0332 3 episodes - episode_reward: 26.991 [-129.360, 234.739] - loss: 6.287 - mae: 44.961 - mean_q: 59.186 Interval 246 (122500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1023 2 episodes - episode_reward: 60.871 [-91.511, 213.252] - loss: 7.793 - mae: 45.145 - mean_q: 59.019 Interval 247 (123000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3461 1 episodes - episode_reward: 209.120 [209.120, 209.120] - loss: 10.174 - mae: 44.893 - mean_q: 58.884 Interval 248 (123500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5682 4 episodes - episode_reward: -76.335 [-436.461, 257.169] - loss: 9.286 - mae: 44.923 - mean_q: 58.838 Interval 249 (124000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1417 Interval 250 (124500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1921 2 episodes - episode_reward: -49.377 [-247.663, 148.909] - loss: 13.948 - mae: 44.640 - mean_q: 58.399 Interval 251 (125000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1530 Interval 252 (125500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6114 2 episodes - episode_reward: 225.895 [222.696, 229.094] - loss: 8.839 - mae: 44.766 - mean_q: 58.671 Interval 253 (126000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5966 1 episodes - episode_reward: 157.409 [157.409, 157.409] - loss: 10.717 - mae: 44.787 - mean_q: 58.441 Interval 254 (126500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5522 1 episodes - episode_reward: 227.413 [227.413, 227.413] - loss: 13.958 - mae: 45.268 - mean_q: 59.151 Interval 255 (127000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2557 2 episodes - episode_reward: 145.170 [16.742, 273.597] - loss: 8.264 - mae: 45.049 - mean_q: 58.569 Interval 256 (127500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5258 1 episodes - episode_reward: 201.169 [201.169, 201.169] - loss: 12.130 - mae: 45.058 - mean_q: 58.731 Interval 257 (128000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2336 1 episodes - episode_reward: 180.690 [180.690, 180.690] - loss: 7.149 - mae: 44.891 - mean_q: 58.470 Interval 258 (128500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3753 2 episodes - episode_reward: -83.277 [-117.162, -49.391] - loss: 11.980 - mae: 44.705 - mean_q: 57.831 Interval 259 (129000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3070 1 episodes - episode_reward: 132.780 [132.780, 132.780] - loss: 8.091 - mae: 44.403 - mean_q: 57.703 Interval 260 (129500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1374 Interval 261 (130000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2286 Interval 262 (130500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3492 1 episodes - episode_reward: 13.366 [13.366, 13.366] - loss: 7.715 - mae: 44.235 - mean_q: 57.833 Interval 263 (131000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0219 Interval 264 (131500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1337 1 episodes - episode_reward: 118.998 [118.998, 118.998] - loss: 8.511 - mae: 43.628 - mean_q: 56.752 Interval 265 (132000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1327 1 episodes - episode_reward: -304.698 [-304.698, -304.698] - loss: 8.812 - mae: 43.602 - mean_q: 56.763 Interval 266 (132500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3062 1 episodes - episode_reward: 135.946 [135.946, 135.946] - loss: 10.098 - mae: 43.591 - mean_q: 56.881 Interval 267 (133000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2041 1 episodes - episode_reward: 158.282 [158.282, 158.282] - loss: 9.719 - mae: 43.526 - mean_q: 56.669 Interval 268 (133500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5997 1 episodes - episode_reward: -251.791 [-251.791, -251.791] - loss: 9.834 - mae: 43.453 - mean_q: 56.622 Interval 269 (134000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0359 Interval 270 (134500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0089 Interval 271 (135000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5921 2 episodes - episode_reward: 4.495 [-30.798, 39.788] - loss: 10.210 - mae: 43.498 - mean_q: 56.847 Interval 272 (135500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2228 1 episodes - episode_reward: 326.697 [326.697, 326.697] - loss: 9.154 - mae: 44.110 - mean_q: 57.617 Interval 273 (136000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2741 2 episodes - episode_reward: -118.307 [-120.516, -116.098] - loss: 11.957 - mae: 44.054 - mean_q: 57.555 Interval 274 (136500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4992 1 episodes - episode_reward: 213.138 [213.138, 213.138] - loss: 9.748 - mae: 44.078 - mean_q: 57.698 Interval 275 (137000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0761 2 episodes - episode_reward: 57.557 [-153.558, 268.673] - loss: 9.858 - mae: 44.554 - mean_q: 58.052 Interval 276 (137500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1400 Interval 277 (138000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2161 2 episodes - episode_reward: 62.480 [-39.306, 164.265] - loss: 9.952 - mae: 44.754 - mean_q: 58.519 Interval 278 (138500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3138 1 episodes - episode_reward: 140.645 [140.645, 140.645] - loss: 11.638 - mae: 44.747 - mean_q: 58.680 Interval 279 (139000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1475 3 episodes - episode_reward: 37.272 [-107.772, 282.729] - loss: 10.439 - mae: 45.223 - mean_q: 59.349 Interval 280 (139500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3633 1 episodes - episode_reward: 200.741 [200.741, 200.741] - loss: 9.158 - mae: 45.064 - mean_q: 59.191 Interval 281 (140000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1603 Interval 282 (140500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1009 1 episodes - episode_reward: 159.748 [159.748, 159.748] - loss: 7.685 - mae: 44.993 - mean_q: 58.703 Interval 283 (141000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1937 Interval 284 (141500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5416 3 episodes - episode_reward: -90.804 [-303.550, 179.597] - loss: 8.866 - mae: 45.103 - mean_q: 58.433 Interval 285 (142000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2083 2 episodes - episode_reward: 70.155 [-114.158, 254.467] - loss: 9.801 - mae: 45.313 - mean_q: 59.095 Interval 286 (142500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4242 1 episodes - episode_reward: 168.071 [168.071, 168.071] - loss: 10.144 - mae: 44.831 - mean_q: 58.635 Interval 287 (143000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2781 2 episodes - episode_reward: 82.472 [-100.153, 265.097] - loss: 10.164 - mae: 44.939 - mean_q: 58.492 Interval 288 (143500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1112 2 episodes - episode_reward: -10.833 [-138.683, 117.017] - loss: 7.156 - mae: 45.205 - mean_q: 59.130 Interval 289 (144000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5004 1 episodes - episode_reward: 224.007 [224.007, 224.007] - loss: 8.328 - mae: 45.436 - mean_q: 59.388 Interval 290 (144500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4651 1 episodes - episode_reward: -203.136 [-203.136, -203.136] - loss: 11.991 - mae: 45.269 - mean_q: 59.100 Interval 291 (145000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0923 1 episodes - episode_reward: -37.738 [-37.738, -37.738] - loss: 7.584 - mae: 45.545 - mean_q: 59.875 Interval 292 (145500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4609 1 episodes - episode_reward: 204.596 [204.596, 204.596] - loss: 7.495 - mae: 45.679 - mean_q: 59.790 Interval 293 (146000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0536 2 episodes - episode_reward: 45.060 [-158.083, 248.204] - loss: 7.125 - mae: 46.151 - mean_q: 60.493 Interval 294 (146500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0943 Interval 295 (147000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1996 2 episodes - episode_reward: 57.522 [-98.376, 213.421] - loss: 10.901 - mae: 45.770 - mean_q: 59.669 Interval 296 (147500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4455 1 episodes - episode_reward: 199.364 [199.364, 199.364] - loss: 9.293 - mae: 45.979 - mean_q: 60.046 Interval 297 (148000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1094 Interval 298 (148500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0114 2 episodes - episode_reward: 5.442 [-152.881, 163.766] - loss: 11.218 - mae: 45.645 - mean_q: 59.343 Interval 299 (149000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3439 1 episodes - episode_reward: 141.142 [141.142, 141.142] - loss: 7.909 - mae: 45.954 - mean_q: 59.987 Interval 300 (149500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3384 1 episodes - episode_reward: 271.083 [271.083, 271.083] - loss: 7.748 - mae: 45.462 - mean_q: 59.582 Interval 301 (150000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1439 2 episodes - episode_reward: -33.462 [-342.169, 275.245] - loss: 8.030 - mae: 45.670 - mean_q: 60.123 Interval 302 (150500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7821 3 episodes - episode_reward: -134.795 [-165.630, -100.000] - loss: 8.460 - mae: 45.671 - mean_q: 60.165 Interval 303 (151000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7802 2 episodes - episode_reward: -168.117 [-177.671, -158.562] - loss: 6.110 - mae: 45.086 - mean_q: 59.071 Interval 304 (151500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0534 4 episodes - episode_reward: -28.351 [-251.786, 232.249] - loss: 8.915 - mae: 45.518 - mean_q: 59.457 Interval 305 (152000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2212 1 episodes - episode_reward: -124.380 [-124.380, -124.380] - loss: 9.898 - mae: 45.257 - mean_q: 58.991 Interval 306 (152500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3920 1 episodes - episode_reward: 227.405 [227.405, 227.405] - loss: 11.601 - mae: 45.035 - mean_q: 58.324 Interval 307 (153000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3387 2 episodes - episode_reward: -86.098 [-125.451, -46.746] - loss: 7.652 - mae: 45.168 - mean_q: 58.445 Interval 308 (153500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3437 1 episodes - episode_reward: -265.060 [-265.060, -265.060] - loss: 8.909 - mae: 45.064 - mean_q: 58.419 Interval 309 (154000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6357 1 episodes - episode_reward: 189.286 [189.286, 189.286] - loss: 7.774 - mae: 45.083 - mean_q: 58.852 Interval 310 (154500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5651 2 episodes - episode_reward: 257.594 [188.269, 326.919] - loss: 10.031 - mae: 45.054 - mean_q: 58.520 Interval 311 (155000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6222 1 episodes - episode_reward: 248.621 [248.621, 248.621] - loss: 8.941 - mae: 44.667 - mean_q: 58.092 Interval 312 (155500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8512 2 episodes - episode_reward: -241.228 [-281.753, -200.702] - loss: 12.346 - mae: 44.479 - mean_q: 57.434 Interval 313 (156000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1346 2 episodes - episode_reward: 41.362 [-132.961, 215.684] - loss: 10.919 - mae: 44.019 - mean_q: 56.831 Interval 314 (156500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7056 1 episodes - episode_reward: 177.031 [177.031, 177.031] - loss: 7.749 - mae: 43.409 - mean_q: 56.479 Interval 315 (157000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4950 2 episodes - episode_reward: 99.629 [-41.163, 240.420] - loss: 8.366 - mae: 43.816 - mean_q: 57.003 Interval 316 (157500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3050 2 episodes - episode_reward: 164.920 [15.903, 313.938] - loss: 9.624 - mae: 44.148 - mean_q: 57.116 Interval 317 (158000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.8176 1 episodes - episode_reward: 289.067 [289.067, 289.067] - loss: 10.008 - mae: 44.576 - mean_q: 57.481 Interval 318 (158500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0766 3 episodes - episode_reward: -0.292 [-211.178, 298.941] - loss: 9.179 - mae: 44.499 - mean_q: 57.198 Interval 319 (159000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3339 1 episodes - episode_reward: 222.135 [222.135, 222.135] - loss: 8.724 - mae: 44.631 - mean_q: 57.873 Interval 320 (159500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3433 2 episodes - episode_reward: 98.264 [-61.474, 258.001] - loss: 8.424 - mae: 44.444 - mean_q: 57.582 Interval 321 (160000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1931 Interval 322 (160500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2608 1 episodes - episode_reward: 208.522 [208.522, 208.522] - loss: 9.720 - mae: 45.006 - mean_q: 57.831 Interval 323 (161000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2506 1 episodes - episode_reward: 102.711 [102.711, 102.711] - loss: 9.470 - mae: 44.570 - mean_q: 57.602 Interval 324 (161500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8058 2 episodes - episode_reward: 146.431 [67.932, 224.930] - loss: 8.209 - mae: 44.878 - mean_q: 58.020 Interval 325 (162000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2156 1 episodes - episode_reward: 284.640 [284.640, 284.640] - loss: 9.893 - mae: 44.602 - mean_q: 57.919 Interval 326 (162500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1639 Interval 327 (163000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4208 1 episodes - episode_reward: 174.050 [174.050, 174.050] - loss: 9.390 - mae: 44.978 - mean_q: 57.916 Interval 328 (163500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5019 3 episodes - episode_reward: 116.487 [-164.454, 307.870] - loss: 8.105 - mae: 44.858 - mean_q: 57.855 Interval 329 (164000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1792 1 episodes - episode_reward: 168.691 [168.691, 168.691] - loss: 9.835 - mae: 45.317 - mean_q: 58.223 Interval 330 (164500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5224 1 episodes - episode_reward: 59.417 [59.417, 59.417] - loss: 11.092 - mae: 45.496 - mean_q: 58.675 Interval 331 (165000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8726 2 episodes - episode_reward: 239.432 [229.338, 249.526] - loss: 9.334 - mae: 45.478 - mean_q: 58.911 Interval 332 (165500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5205 1 episodes - episode_reward: 284.683 [284.683, 284.683] - loss: 10.343 - mae: 45.526 - mean_q: 58.304 Interval 333 (166000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4180 1 episodes - episode_reward: 267.042 [267.042, 267.042] - loss: 11.349 - mae: 45.428 - mean_q: 58.403 Interval 334 (166500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3831 2 episodes - episode_reward: 83.558 [5.715, 161.401] - loss: 8.821 - mae: 45.619 - mean_q: 58.948 Interval 335 (167000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1062 2 episodes - episode_reward: 29.940 [-137.792, 197.671] - loss: 10.538 - mae: 45.362 - mean_q: 58.719 Interval 336 (167500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4034 1 episodes - episode_reward: 261.128 [261.128, 261.128] - loss: 7.718 - mae: 45.670 - mean_q: 59.099 Interval 337 (168000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6709 1 episodes - episode_reward: 176.974 [176.974, 176.974] - loss: 9.764 - mae: 46.310 - mean_q: 59.725 Interval 338 (168500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1396 2 episodes - episode_reward: 4.622 [-216.857, 226.101] - loss: 9.988 - mae: 46.135 - mean_q: 59.660 Interval 339 (169000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6929 1 episodes - episode_reward: 213.618 [213.618, 213.618] - loss: 11.693 - mae: 46.277 - mean_q: 60.156 Interval 340 (169500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4280 1 episodes - episode_reward: 298.305 [298.305, 298.305] - loss: 7.936 - mae: 46.445 - mean_q: 60.018 Interval 341 (170000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3641 2 episodes - episode_reward: 63.043 [-73.657, 199.743] - loss: 9.320 - mae: 46.279 - mean_q: 59.740 Interval 342 (170500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3878 1 episodes - episode_reward: 247.004 [247.004, 247.004] - loss: 8.013 - mae: 46.367 - mean_q: 59.983 Interval 343 (171000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2789 1 episodes - episode_reward: 193.257 [193.257, 193.257] - loss: 6.953 - mae: 46.490 - mean_q: 60.086 Interval 344 (171500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2699 1 episodes - episode_reward: 81.826 [81.826, 81.826] - loss: 10.538 - mae: 46.483 - mean_q: 59.989 Interval 345 (172000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4020 1 episodes - episode_reward: 209.702 [209.702, 209.702] - loss: 7.954 - mae: 46.536 - mean_q: 60.301 Interval 346 (172500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5111 2 episodes - episode_reward: 25.977 [-136.949, 188.902] - loss: 8.229 - mae: 46.546 - mean_q: 60.396 Interval 347 (173000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5788 1 episodes - episode_reward: -34.819 [-34.819, -34.819] - loss: 10.218 - mae: 47.124 - mean_q: 61.273 Interval 348 (173500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7444 2 episodes - episode_reward: 235.622 [220.947, 250.298] - loss: 8.977 - mae: 47.219 - mean_q: 61.187 Interval 349 (174000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0991 2 episodes - episode_reward: 23.399 [5.218, 41.580] - loss: 7.586 - mae: 47.453 - mean_q: 61.483 Interval 350 (174500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4283 1 episodes - episode_reward: 252.334 [252.334, 252.334] - loss: 8.559 - mae: 47.910 - mean_q: 62.356 Interval 351 (175000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1755 4 episodes - episode_reward: 0.402 [-105.882, 255.992] - loss: 12.250 - mae: 48.026 - mean_q: 62.002 Interval 352 (175500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8539 1 episodes - episode_reward: 267.670 [267.670, 267.670] - loss: 14.423 - mae: 48.472 - mean_q: 62.920 Interval 353 (176000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4589 3 episodes - episode_reward: 2.589 [-176.638, 312.534] - loss: 13.040 - mae: 47.833 - mean_q: 62.188 Interval 354 (176500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6249 2 episodes - episode_reward: 83.977 [-41.542, 209.496] - loss: 10.616 - mae: 47.886 - mean_q: 62.130 Interval 355 (177000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3877 3 episodes - episode_reward: 94.043 [-181.335, 243.584] - loss: 10.777 - mae: 47.994 - mean_q: 61.907 Interval 356 (177500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0311 3 episodes - episode_reward: 9.480 [-121.905, 248.508] - loss: 9.568 - mae: 47.935 - mean_q: 62.530 Interval 357 (178000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3465 2 episodes - episode_reward: 79.738 [-100.000, 259.476] - loss: 8.562 - mae: 47.946 - mean_q: 62.455 Interval 358 (178500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7414 1 episodes - episode_reward: 262.714 [262.714, 262.714] - loss: 9.469 - mae: 47.957 - mean_q: 62.633 Interval 359 (179000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7182 2 episodes - episode_reward: 247.755 [242.221, 253.289] - loss: 11.935 - mae: 47.722 - mean_q: 62.199 Interval 360 (179500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4345 1 episodes - episode_reward: 218.812 [218.812, 218.812] - loss: 9.594 - mae: 48.084 - mean_q: 62.656 Interval 361 (180000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5425 4 episodes - episode_reward: -108.755 [-146.312, -90.910] - loss: 8.586 - mae: 48.051 - mean_q: 62.598 Interval 362 (180500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3986 1 episodes - episode_reward: 287.042 [287.042, 287.042] - loss: 10.759 - mae: 48.268 - mean_q: 62.481 Interval 363 (181000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6731 1 episodes - episode_reward: 277.317 [277.317, 277.317] - loss: 8.603 - mae: 48.428 - mean_q: 62.855 Interval 364 (181500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2544 2 episodes - episode_reward: 45.070 [-191.908, 282.049] - loss: 9.678 - mae: 48.182 - mean_q: 62.605 Interval 365 (182000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5394 2 episodes - episode_reward: 254.158 [225.051, 283.266] - loss: 9.639 - mae: 48.407 - mean_q: 62.837 Interval 366 (182500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0176 1 episodes - episode_reward: -188.702 [-188.702, -188.702] - loss: 7.449 - mae: 48.842 - mean_q: 63.523 Interval 367 (183000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6299 3 episodes - episode_reward: 149.292 [-91.334, 302.230] - loss: 11.457 - mae: 48.426 - mean_q: 62.466 Interval 368 (183500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3103 2 episodes - episode_reward: 61.708 [-148.904, 272.320] - loss: 10.509 - mae: 48.238 - mean_q: 62.444 Interval 369 (184000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2335 Interval 370 (184500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0211 2 episodes - episode_reward: 74.201 [-100.000, 248.402] - loss: 12.597 - mae: 48.181 - mean_q: 62.204 Interval 371 (185000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2799 1 episodes - episode_reward: 164.698 [164.698, 164.698] - loss: 8.274 - mae: 47.883 - mean_q: 61.477 Interval 372 (185500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0762 Interval 373 (186000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5547 2 episodes - episode_reward: 159.919 [122.921, 196.917] - loss: 7.909 - mae: 48.251 - mean_q: 61.793 Interval 374 (186500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4985 1 episodes - episode_reward: 216.587 [216.587, 216.587] - loss: 11.066 - mae: 48.319 - mean_q: 62.176 Interval 375 (187000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0774 2 episodes - episode_reward: -55.315 [-100.000, -10.630] - loss: 9.928 - mae: 48.338 - mean_q: 62.303 Interval 376 (187500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0350 Interval 377 (188000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7621 2 episodes - episode_reward: 232.237 [221.778, 242.696] - loss: 8.830 - mae: 48.588 - mean_q: 62.413 Interval 378 (188500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2465 1 episodes - episode_reward: 201.612 [201.612, 201.612] - loss: 12.527 - mae: 48.254 - mean_q: 62.098 Interval 379 (189000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1602 1 episodes - episode_reward: -86.415 [-86.415, -86.415] - loss: 9.158 - mae: 48.377 - mean_q: 62.572 Interval 380 (189500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4848 1 episodes - episode_reward: 237.780 [237.780, 237.780] - loss: 7.336 - mae: 48.377 - mean_q: 62.125 Interval 381 (190000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2317 Interval 382 (190500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4077 1 episodes - episode_reward: 210.898 [210.898, 210.898] - loss: 8.668 - mae: 48.584 - mean_q: 62.226 Interval 383 (191000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3336 1 episodes - episode_reward: 203.228 [203.228, 203.228] - loss: 8.648 - mae: 48.333 - mean_q: 62.255 Interval 384 (191500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 6.5748e-04 Interval 385 (192000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1205 1 episodes - episode_reward: 156.431 [156.431, 156.431] - loss: 7.569 - mae: 48.196 - mean_q: 61.670 Interval 386 (192500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3748 Interval 387 (193000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.7124 2 episodes - episode_reward: 229.517 [201.016, 258.018] - loss: 9.427 - mae: 48.371 - mean_q: 62.232 Interval 388 (193500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0741 Interval 389 (194000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0949 2 episodes - episode_reward: 69.563 [-93.802, 232.929] - loss: 8.200 - mae: 48.432 - mean_q: 62.318 Interval 390 (194500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4050 1 episodes - episode_reward: 199.977 [199.977, 199.977] - loss: 8.345 - mae: 48.046 - mean_q: 61.523 Interval 391 (195000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4907 1 episodes - episode_reward: 250.929 [250.929, 250.929] - loss: 10.237 - mae: 47.941 - mean_q: 62.083 Interval 392 (195500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5732 1 episodes - episode_reward: 190.083 [190.083, 190.083] - loss: 8.730 - mae: 47.762 - mean_q: 61.756 Interval 393 (196000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5309 3 episodes - episode_reward: 122.040 [-87.637, 259.318] - loss: 10.991 - mae: 48.012 - mean_q: 61.988 Interval 394 (196500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2323 Interval 395 (197000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1568 2 episodes - episode_reward: 81.281 [-60.710, 223.272] - loss: 8.739 - mae: 48.525 - mean_q: 62.721 Interval 396 (197500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0749 Interval 397 (198000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1987 1 episodes - episode_reward: 156.133 [156.133, 156.133] - loss: 10.332 - mae: 48.341 - mean_q: 62.400 Interval 398 (198500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6689 1 episodes - episode_reward: 153.036 [153.036, 153.036] - loss: 10.028 - mae: 48.497 - mean_q: 62.710 Interval 399 (199000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3669 4 episodes - episode_reward: 83.027 [-106.597, 293.098] - loss: 10.041 - mae: 48.677 - mean_q: 62.840 Interval 400 (199500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0114 1 episodes - episode_reward: -10.982 [-10.982, -10.982] - loss: 10.161 - mae: 48.414 - mean_q: 62.804 Interval 401 (200000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2538 1 episodes - episode_reward: 165.825 [165.825, 165.825] - loss: 8.784 - mae: 48.386 - mean_q: 62.460 Interval 402 (200500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0730 1 episodes - episode_reward: 122.731 [122.731, 122.731] - loss: 8.686 - mae: 48.499 - mean_q: 63.059 Interval 403 (201000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2426 Interval 404 (201500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0201 Interval 405 (202000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2222 3 episodes - episode_reward: -27.573 [-269.514, 142.041] - loss: 9.041 - mae: 47.903 - mean_q: 62.062 Interval 406 (202500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1965 1 episodes - episode_reward: 9.111 [9.111, 9.111] - loss: 8.525 - mae: 47.490 - mean_q: 61.423 Interval 407 (203000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0154 3 episodes - episode_reward: 5.349 [-105.854, 227.465] - loss: 7.863 - mae: 47.640 - mean_q: 61.621 Interval 408 (203500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 1.0106 2 episodes - episode_reward: 266.196 [194.914, 337.478] - loss: 9.953 - mae: 47.562 - mean_q: 61.292 Interval 409 (204000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0054 Interval 410 (204500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1558 1 episodes - episode_reward: 166.685 [166.685, 166.685] - loss: 8.452 - mae: 46.770 - mean_q: 60.370 Interval 411 (205000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2581 3 episodes - episode_reward: -79.499 [-178.411, -14.131] - loss: 8.652 - mae: 46.662 - mean_q: 60.356 Interval 412 (205500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1527 Interval 413 (206000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2041 1 episodes - episode_reward: 262.832 [262.832, 262.832] - loss: 10.880 - mae: 46.698 - mean_q: 60.731 Interval 414 (206500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4730 3 episodes - episode_reward: 86.298 [-100.000, 226.285] - loss: 9.302 - mae: 46.465 - mean_q: 60.188 Interval 415 (207000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5910 3 episodes - episode_reward: -104.011 [-131.103, -58.863] - loss: 8.998 - mae: 46.340 - mean_q: 60.105 Interval 416 (207500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1432 Interval 417 (208000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5455 1 episodes - episode_reward: 192.873 [192.873, 192.873] - loss: 10.164 - mae: 45.741 - mean_q: 59.648 Interval 418 (208500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7139 3 episodes - episode_reward: 165.617 [14.364, 249.097] - loss: 10.422 - mae: 45.507 - mean_q: 59.080 Interval 419 (209000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0838 Interval 420 (209500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.8451 2 episodes - episode_reward: 192.743 [150.057, 235.428] - loss: 12.067 - mae: 45.481 - mean_q: 58.812 Interval 421 (210000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5351 1 episodes - episode_reward: 247.097 [247.097, 247.097] - loss: 9.434 - mae: 45.479 - mean_q: 58.975 Interval 422 (210500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0298 2 episodes - episode_reward: 48.408 [-110.899, 207.715] - loss: 7.969 - mae: 45.318 - mean_q: 58.733 Interval 423 (211000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1741 1 episodes - episode_reward: 109.587 [109.587, 109.587] - loss: 11.939 - mae: 45.328 - mean_q: 58.748 Interval 424 (211500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5621 1 episodes - episode_reward: 212.599 [212.599, 212.599] - loss: 8.996 - mae: 45.493 - mean_q: 59.004 Interval 425 (212000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4386 2 episodes - episode_reward: 82.891 [-100.000, 265.782] - loss: 8.464 - mae: 45.132 - mean_q: 58.821 Interval 426 (212500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3734 1 episodes - episode_reward: 251.988 [251.988, 251.988] - loss: 8.292 - mae: 45.409 - mean_q: 59.199 Interval 427 (213000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5261 5 episodes - episode_reward: -45.576 [-186.084, 201.751] - loss: 8.934 - mae: 44.867 - mean_q: 58.551 Interval 428 (213500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5856 2 episodes - episode_reward: -143.435 [-276.612, -10.258] - loss: 6.718 - mae: 45.046 - mean_q: 59.014 Interval 429 (214000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4723 1 episodes - episode_reward: 187.252 [187.252, 187.252] - loss: 11.418 - mae: 45.023 - mean_q: 58.857 Interval 430 (214500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1721 1 episodes - episode_reward: 211.339 [211.339, 211.339] - loss: 8.427 - mae: 44.736 - mean_q: 58.317 Interval 431 (215000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2946 Interval 432 (215500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.8495 2 episodes - episode_reward: 221.237 [189.354, 253.120] - loss: 9.193 - mae: 45.047 - mean_q: 58.364 Interval 433 (216000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3714 1 episodes - episode_reward: 224.856 [224.856, 224.856] - loss: 7.442 - mae: 45.289 - mean_q: 58.807 Interval 434 (216500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.8935 2 episodes - episode_reward: 237.437 [193.640, 281.234] - loss: 8.641 - mae: 45.570 - mean_q: 59.135 Interval 435 (217000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2564 Interval 436 (217500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.7531 2 episodes - episode_reward: 234.532 [232.259, 236.805] - loss: 9.529 - mae: 45.909 - mean_q: 59.603 Interval 437 (218000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4788 1 episodes - episode_reward: 231.960 [231.960, 231.960] - loss: 8.147 - mae: 46.052 - mean_q: 59.944 Interval 438 (218500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1237 2 episodes - episode_reward: 24.860 [-148.486, 198.206] - loss: 7.919 - mae: 46.401 - mean_q: 60.501 Interval 439 (219000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0063 Interval 440 (219500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0345 Interval 441 (220000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1638 1 episodes - episode_reward: 132.967 [132.967, 132.967] - loss: 6.987 - mae: 46.448 - mean_q: 60.435 Interval 442 (220500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.8207 1 episodes - episode_reward: 251.391 [251.391, 251.391] - loss: 7.804 - mae: 46.642 - mean_q: 60.829 Interval 443 (221000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0995 2 episodes - episode_reward: 75.043 [-111.169, 261.255] - loss: 8.576 - mae: 46.847 - mean_q: 60.694 Interval 444 (221500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2852 2 episodes - episode_reward: 78.436 [-102.183, 259.055] - loss: 9.286 - mae: 46.958 - mean_q: 60.641 Interval 445 (222000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5674 1 episodes - episode_reward: 251.453 [251.453, 251.453] - loss: 8.382 - mae: 47.322 - mean_q: 61.012 Interval 446 (222500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2671 1 episodes - episode_reward: 179.250 [179.250, 179.250] - loss: 8.279 - mae: 46.741 - mean_q: 60.535 Interval 447 (223000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.7921 2 episodes - episode_reward: 201.713 [175.019, 228.408] - loss: 9.100 - mae: 46.886 - mean_q: 61.133 Interval 448 (223500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4867 3 episodes - episode_reward: -84.174 [-160.451, 24.404] - loss: 8.883 - mae: 46.993 - mean_q: 60.549 Interval 449 (224000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0437 Interval 450 (224500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0295 Interval 451 (225000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0323 Interval 452 (225500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0332 Interval 453 (226000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0390 Interval 454 (226500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0324 Interval 455 (227000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0650 Interval 456 (227500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1110 1 episodes - episode_reward: 27.644 [27.644, 27.644] - loss: 7.345 - mae: 44.722 - mean_q: 57.674 Interval 457 (228000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4243 1 episodes - episode_reward: 187.724 [187.724, 187.724] - loss: 7.096 - mae: 44.634 - mean_q: 58.030 Interval 458 (228500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1205 3 episodes - episode_reward: 16.703 [-114.378, 264.486] - loss: 6.881 - mae: 44.508 - mean_q: 57.266 Interval 459 (229000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2268 Interval 460 (229500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3841 1 episodes - episode_reward: 206.154 [206.154, 206.154] - loss: 7.013 - mae: 44.236 - mean_q: 57.138 Interval 461 (230000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0341 Interval 462 (230500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0294 Interval 463 (231000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0317 Interval 464 (231500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0330 Interval 465 (232000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.5434 1 episodes - episode_reward: 126.659 [126.659, 126.659] - loss: 5.963 - mae: 43.541 - mean_q: 56.717 Interval 466 (232500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0894 3 episodes - episode_reward: 1.184 [-158.912, 275.965] - loss: 9.161 - mae: 43.487 - mean_q: 56.601 Interval 467 (233000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3839 1 episodes - episode_reward: 240.798 [240.798, 240.798] - loss: 7.407 - mae: 43.017 - mean_q: 56.114 Interval 468 (233500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0988 Interval 469 (234000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0917 3 episodes - episode_reward: 24.556 [-151.042, 184.135] - loss: 5.304 - mae: 43.136 - mean_q: 56.526 Interval 470 (234500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3435 1 episodes - episode_reward: 171.064 [171.064, 171.064] - loss: 10.036 - mae: 42.869 - mean_q: 56.136 Interval 471 (235000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5572 1 episodes - episode_reward: 262.319 [262.319, 262.319] - loss: 9.158 - mae: 42.786 - mean_q: 55.949 Interval 472 (235500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0343 1 episodes - episode_reward: -12.504 [-12.504, -12.504] - loss: 7.363 - mae: 43.086 - mean_q: 56.395 Interval 473 (236000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7491 4 episodes - episode_reward: -74.622 [-155.166, 95.563] - loss: 5.901 - mae: 43.547 - mean_q: 57.170 Interval 474 (236500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4711 1 episodes - episode_reward: 234.512 [234.512, 234.512] - loss: 9.917 - mae: 43.317 - mean_q: 56.716 Interval 475 (237000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3870 3 episodes - episode_reward: 20.048 [-101.641, 203.202] - loss: 10.062 - mae: 42.784 - mean_q: 55.406 Interval 476 (237500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6029 1 episodes - episode_reward: 254.140 [254.140, 254.140] - loss: 8.663 - mae: 43.156 - mean_q: 56.184 Interval 477 (238000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5800 1 episodes - episode_reward: 289.574 [289.574, 289.574] - loss: 7.411 - mae: 43.593 - mean_q: 56.552 Interval 478 (238500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6768 2 episodes - episode_reward: 224.304 [221.034, 227.573] - loss: 7.441 - mae: 43.679 - mean_q: 56.729 Interval 479 (239000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1274 Interval 480 (239500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0305 Interval 481 (240000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0189 Interval 482 (240500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0299 Interval 483 (241000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0266 Interval 484 (241500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0232 Interval 485 (242000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0422 Interval 486 (242500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0265 Interval 487 (243000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.0388 Interval 488 (243500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.0359 Interval 489 (244000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.0355 Interval 490 (244500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.0517 Interval 491 (245000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.0438 Interval 492 (245500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3279 2 episodes - episode_reward: -25.498 [-30.116, -20.879] - loss: 6.774 - mae: 39.714 - mean_q: 51.821 Interval 493 (246000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1045 2 episodes - episode_reward: -65.459 [-141.174, 10.255] - loss: 6.710 - mae: 39.937 - mean_q: 52.119 Interval 494 (246500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1776 1 episodes - episode_reward: 278.976 [278.976, 278.976] - loss: 7.650 - mae: 39.361 - mean_q: 51.243 Interval 495 (247000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2960 Interval 496 (247500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4881 1 episodes - episode_reward: 216.001 [216.001, 216.001] - loss: 7.536 - mae: 39.413 - mean_q: 51.203 Interval 497 (248000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1393 2 episodes - episode_reward: 122.102 [-7.086, 251.289] - loss: 7.289 - mae: 39.343 - mean_q: 51.085 Interval 498 (248500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0767 1 episodes - episode_reward: -98.574 [-98.574, -98.574] - loss: 9.157 - mae: 39.207 - mean_q: 50.988 Interval 499 (249000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.7080 1 episodes - episode_reward: 232.464 [232.464, 232.464] - loss: 6.707 - mae: 39.220 - mean_q: 50.888 Interval 500 (249500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5178 done, took 1772.135 seconds
weights.append(f'dqn_lunar_weights_three.h5f')
models.append(dqn)
models[-1].load_weights(weights[-1])
df = pd.DataFrame(history.history)
ax = df['episode_reward'].plot(color = 'lightgray')
df['episode_reward'].rolling(50).mean().plot(color = 'black')
ax.set_xlabel("Episode")
plt.ylabel("Rolling Mean (10) Cumulative Return")
plt.show()
Performance has improved significantly. Although mean reward levels are still hovering around zero, we are seeing much less of the big negative numbers that were seen in the previous models. Some positive numbers are also now being recorded.
rl['Model 4'] = [6, '128/64/32', 250000, 50000, 500, None]
Here I have increased window size again, keeping the same memory and step limits.
model = Sequential()
#input_shape of 6, to reflect new window length
model.add(Flatten(input_shape=(6,) + env.observation_space.shape))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())
Model: "sequential_40" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_39 (Flatten) (None, 48) 0 _________________________________________________________________ dense_156 (Dense) (None, 128) 6272 _________________________________________________________________ activation_156 (Activation) (None, 128) 0 _________________________________________________________________ dense_157 (Dense) (None, 64) 8256 _________________________________________________________________ activation_157 (Activation) (None, 64) 0 _________________________________________________________________ dense_158 (Dense) (None, 32) 2080 _________________________________________________________________ activation_158 (Activation) (None, 32) 0 _________________________________________________________________ dense_159 (Dense) (None, 4) 132 _________________________________________________________________ activation_159 (Activation) (None, 4) 0 ================================================================= Total params: 16,740 Trainable params: 16,740 Non-trainable params: 0 _________________________________________________________________ None
memory = SequentialMemory(limit=50000, window_length=6)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=30,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
weights_filename = f'dqn_lunar_weights_four.h5f'
checkpoint_weights_filename = 'dqn_lunar_weights_{step}.h5f'
log_filename = f'dqn_lunar_log.json'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]
start_time = time.time()
history = dqn.fit(env, callbacks=callbacks, nb_steps=250000, log_interval=500)
end_time = time.time()
Training for 250000 steps ... Interval 1 (0 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2470 4 episodes - episode_reward: -163.342 [-230.525, -100.000] - loss: 51.265 - mae: 2.554 - mean_q: 1.008 Interval 2 (500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8216 3 episodes - episode_reward: -89.515 [-297.567, 178.719] - loss: 21.909 - mae: 3.750 - mean_q: 5.078 Interval 3 (1000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7107 4 episodes - episode_reward: -361.254 [-610.655, -212.364] - loss: 23.944 - mae: 7.009 - mean_q: 6.891 Interval 4 (1500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0163 Interval 5 (2000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5665 3 episodes - episode_reward: -108.893 [-259.213, 58.571] - loss: 12.971 - mae: 13.062 - mean_q: 10.156 Interval 6 (2500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1934 2 episodes - episode_reward: -263.924 [-366.131, -161.717] - loss: 12.006 - mae: 15.181 - mean_q: 10.272 Interval 7 (3000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0265 1 episodes - episode_reward: -88.025 [-88.025, -88.025] - loss: 9.384 - mae: 16.935 - mean_q: 10.641 Interval 8 (3500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6696 4 episodes - episode_reward: -184.618 [-382.822, -78.430] - loss: 11.381 - mae: 17.962 - mean_q: 10.930 Interval 9 (4000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1761 1 episodes - episode_reward: -102.566 [-102.566, -102.566] - loss: 9.476 - mae: 19.035 - mean_q: 10.275 Interval 10 (4500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1509 2 episodes - episode_reward: 25.683 [-98.552, 149.919] - loss: 10.995 - mae: 20.347 - mean_q: 9.987 Interval 11 (5000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2571 1 episodes - episode_reward: 162.281 [162.281, 162.281] - loss: 11.518 - mae: 20.355 - mean_q: 10.621 Interval 12 (5500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5160 2 episodes - episode_reward: -153.999 [-210.083, -97.916] - loss: 10.997 - mae: 20.959 - mean_q: 12.013 Interval 13 (6000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8938 3 episodes - episode_reward: -144.312 [-241.641, -85.761] - loss: 10.335 - mae: 21.686 - mean_q: 13.132 Interval 14 (6500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0076 Interval 15 (7000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1736 Interval 16 (7500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9646 3 episodes - episode_reward: -174.911 [-304.702, -105.582] - loss: 9.579 - mae: 23.077 - mean_q: 12.835 Interval 17 (8000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0671 4 episodes - episode_reward: -131.414 [-168.438, -112.278] - loss: 9.747 - mae: 23.507 - mean_q: 12.139 Interval 18 (8500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0521 1 episodes - episode_reward: -162.606 [-162.606, -162.606] - loss: 8.545 - mae: 24.425 - mean_q: 11.918 Interval 19 (9000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0262 3 episodes - episode_reward: 23.643 [-110.864, 240.154] - loss: 9.638 - mae: 25.338 - mean_q: 13.745 Interval 20 (9500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2831 1 episodes - episode_reward: -72.630 [-72.630, -72.630] - loss: 7.501 - mae: 26.020 - mean_q: 15.023 Interval 21 (10000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1161 Interval 22 (10500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1237 Interval 23 (11000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1737 Interval 24 (11500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1823 Interval 25 (12000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1533 Interval 26 (12500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1565 Interval 27 (13000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1720 Interval 28 (13500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0321 Interval 29 (14000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3525 2 episodes - episode_reward: -214.822 [-435.427, 5.782] - loss: 6.585 - mae: 28.571 - mean_q: 25.410 Interval 30 (14500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1605 Interval 31 (15000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0970 Interval 32 (15500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1617 Interval 33 (16000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1764 Interval 34 (16500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1474 Interval 35 (17000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1742 Interval 36 (17500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0799 Interval 37 (18000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1421 3 episodes - episode_reward: -142.362 [-348.836, 24.921] - loss: 8.206 - mae: 33.375 - mean_q: 35.396 Interval 38 (18500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2904 1 episodes - episode_reward: 182.662 [182.662, 182.662] - loss: 8.185 - mae: 34.190 - mean_q: 37.210 Interval 39 (19000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.0202 2 episodes - episode_reward: -64.090 [-121.664, -6.516] - loss: 10.839 - mae: 34.994 - mean_q: 38.257 Interval 40 (19500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4365 3 episodes - episode_reward: -34.276 [-92.621, 66.217] - loss: 8.818 - mae: 36.028 - mean_q: 39.337 Interval 41 (20000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4319 2 episodes - episode_reward: -96.682 [-111.868, -81.496] - loss: 6.928 - mae: 37.004 - mean_q: 41.393 Interval 42 (20500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0987 Interval 43 (21000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7673 3 episodes - episode_reward: -117.011 [-154.306, -96.725] - loss: 8.984 - mae: 38.175 - mean_q: 42.630 Interval 44 (21500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.2546 3 episodes - episode_reward: -77.906 [-189.558, -4.500] - loss: 8.919 - mae: 38.553 - mean_q: 43.591 Interval 45 (22000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4731 2 episodes - episode_reward: -103.018 [-141.418, -64.617] - loss: 11.946 - mae: 39.004 - mean_q: 45.125 Interval 46 (22500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0461 3 episodes - episode_reward: -8.046 [-134.222, 209.308] - loss: 18.651 - mae: 39.442 - mean_q: 45.815 Interval 47 (23000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0483 3 episodes - episode_reward: 0.182 [-122.287, 240.309] - loss: 12.767 - mae: 39.971 - mean_q: 46.699 Interval 48 (23500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2979 1 episodes - episode_reward: -217.289 [-217.289, -217.289] - loss: 10.358 - mae: 40.497 - mean_q: 47.631 Interval 49 (24000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0275 Interval 50 (24500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1134 Interval 51 (25000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1657 1 episodes - episode_reward: 75.596 [75.596, 75.596] - loss: 13.337 - mae: 42.271 - mean_q: 51.752 Interval 52 (25500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2814 Interval 53 (26000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3986 1 episodes - episode_reward: 254.182 [254.182, 254.182] - loss: 16.990 - mae: 43.175 - mean_q: 53.724 Interval 54 (26500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0155 3 episodes - episode_reward: 37.752 [-117.863, 216.919] - loss: 12.590 - mae: 43.590 - mean_q: 54.039 Interval 55 (27000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3731 2 episodes - episode_reward: -80.194 [-146.944, -13.444] - loss: 10.722 - mae: 44.037 - mean_q: 55.255 Interval 56 (27500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3508 1 episodes - episode_reward: -162.925 [-162.925, -162.925] - loss: 13.938 - mae: 44.393 - mean_q: 55.585 Interval 57 (28000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7556 4 episodes - episode_reward: -114.374 [-200.253, -34.480] - loss: 8.547 - mae: 44.878 - mean_q: 56.060 Interval 58 (28500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1029 Interval 59 (29000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4901 1 episodes - episode_reward: 195.839 [195.839, 195.839] - loss: 10.167 - mae: 45.547 - mean_q: 57.056 Interval 60 (29500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2990 1 episodes - episode_reward: 210.159 [210.159, 210.159] - loss: 9.186 - mae: 45.688 - mean_q: 57.256 Interval 61 (30000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0559 Interval 62 (30500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2201 1 episodes - episode_reward: 184.519 [184.519, 184.519] - loss: 9.413 - mae: 45.905 - mean_q: 57.237 Interval 63 (31000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3331 1 episodes - episode_reward: 129.221 [129.221, 129.221] - loss: 8.679 - mae: 46.420 - mean_q: 58.019 Interval 64 (31500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3217 1 episodes - episode_reward: 214.986 [214.986, 214.986] - loss: 7.584 - mae: 46.092 - mean_q: 57.306 Interval 65 (32000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1675 Interval 66 (32500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0557 Interval 67 (33000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4292 2 episodes - episode_reward: 77.897 [3.862, 151.932] - loss: 8.304 - mae: 46.459 - mean_q: 58.057 Interval 68 (33500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0790 Interval 69 (34000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2078 2 episodes - episode_reward: 17.922 [-126.697, 162.540] - loss: 8.860 - mae: 45.900 - mean_q: 56.942 Interval 70 (34500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5504 1 episodes - episode_reward: 299.225 [299.225, 299.225] - loss: 8.117 - mae: 46.244 - mean_q: 57.728 Interval 71 (35000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1457 Interval 72 (35500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0539 Interval 73 (36000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0316 2 episodes - episode_reward: -5.982 [-116.968, 105.003] - loss: 8.917 - mae: 45.112 - mean_q: 56.414 Interval 74 (36500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1433 3 episodes - episode_reward: -7.852 [-108.875, 172.723] - loss: 10.740 - mae: 45.074 - mean_q: 56.217 Interval 75 (37000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2549 1 episodes - episode_reward: -262.673 [-262.673, -262.673] - loss: 8.954 - mae: 45.133 - mean_q: 56.554 Interval 76 (37500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0448 2 episodes - episode_reward: 60.880 [-90.884, 212.644] - loss: 9.898 - mae: 45.025 - mean_q: 56.550 Interval 77 (38000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8522 2 episodes - episode_reward: -217.908 [-314.667, -121.149] - loss: 11.251 - mae: 44.790 - mean_q: 55.959 Interval 78 (38500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0095 Interval 79 (39000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1522 1 episodes - episode_reward: 151.514 [151.514, 151.514] - loss: 10.498 - mae: 44.715 - mean_q: 55.766 Interval 80 (39500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6439 1 episodes - episode_reward: 192.366 [192.366, 192.366] - loss: 15.064 - mae: 44.730 - mean_q: 55.842 Interval 81 (40000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1232 Interval 82 (40500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0141 1 episodes - episode_reward: -70.840 [-70.840, -70.840] - loss: 7.049 - mae: 44.472 - mean_q: 55.891 Interval 83 (41000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2222 1 episodes - episode_reward: 194.922 [194.922, 194.922] - loss: 8.525 - mae: 44.757 - mean_q: 56.715 Interval 84 (41500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2128 1 episodes - episode_reward: -141.529 [-141.529, -141.529] - loss: 9.826 - mae: 44.682 - mean_q: 56.353 Interval 85 (42000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2639 1 episodes - episode_reward: 115.842 [115.842, 115.842] - loss: 10.230 - mae: 44.753 - mean_q: 56.323 Interval 86 (42500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2872 1 episodes - episode_reward: 193.699 [193.699, 193.699] - loss: 11.633 - mae: 44.233 - mean_q: 55.650 Interval 87 (43000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0095 1 episodes - episode_reward: -109.555 [-109.555, -109.555] - loss: 10.649 - mae: 43.908 - mean_q: 55.370 Interval 88 (43500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2573 1 episodes - episode_reward: 222.118 [222.118, 222.118] - loss: 11.251 - mae: 44.205 - mean_q: 55.575 Interval 89 (44000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1552 1 episodes - episode_reward: -132.115 [-132.115, -132.115] - loss: 12.659 - mae: 44.065 - mean_q: 55.145 Interval 90 (44500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2218 Interval 91 (45000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2896 1 episodes - episode_reward: 98.019 [98.019, 98.019] - loss: 10.698 - mae: 43.979 - mean_q: 55.432 Interval 92 (45500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4267 1 episodes - episode_reward: 221.169 [221.169, 221.169] - loss: 10.054 - mae: 44.083 - mean_q: 55.885 Interval 93 (46000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0109 Interval 94 (46500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1518 Interval 95 (47000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0048 Interval 96 (47500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3165 2 episodes - episode_reward: -87.025 [-143.924, -30.125] - loss: 9.748 - mae: 43.362 - mean_q: 54.673 Interval 97 (48000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2220 Interval 98 (48500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4016 1 episodes - episode_reward: 153.534 [153.534, 153.534] - loss: 10.124 - mae: 43.526 - mean_q: 55.136 Interval 99 (49000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0898 Interval 100 (49500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0879 2 episodes - episode_reward: 20.036 [-217.985, 258.056] - loss: 9.127 - mae: 43.185 - mean_q: 54.872 Interval 101 (50000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0028 2 episodes - episode_reward: 19.556 [-121.425, 160.537] - loss: 11.037 - mae: 42.738 - mean_q: 54.878 Interval 102 (50500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0589 Interval 103 (51000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1801 1 episodes - episode_reward: 88.819 [88.819, 88.819] - loss: 5.661 - mae: 42.653 - mean_q: 54.883 Interval 104 (51500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2052 2 episodes - episode_reward: 23.782 [-110.825, 158.389] - loss: 9.967 - mae: 42.429 - mean_q: 54.507 Interval 105 (52000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1328 Interval 106 (52500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0644 1 episodes - episode_reward: 135.813 [135.813, 135.813] - loss: 7.902 - mae: 42.018 - mean_q: 54.238 Interval 107 (53000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0514 1 episodes - episode_reward: 13.110 [13.110, 13.110] - loss: 11.700 - mae: 41.776 - mean_q: 54.180 Interval 108 (53500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8046 2 episodes - episode_reward: -234.123 [-307.424, -160.823] - loss: 7.290 - mae: 41.879 - mean_q: 54.241 Interval 109 (54000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0847 Interval 110 (54500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4434 1 episodes - episode_reward: 164.120 [164.120, 164.120] - loss: 7.539 - mae: 41.776 - mean_q: 54.441 Interval 111 (55000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0175 2 episodes - episode_reward: 51.973 [-132.588, 236.535] - loss: 7.719 - mae: 41.759 - mean_q: 54.198 Interval 112 (55500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1997 3 episodes - episode_reward: -175.027 [-277.257, -34.069] - loss: 7.809 - mae: 41.858 - mean_q: 54.315 Interval 113 (56000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0067 1 episodes - episode_reward: -100.175 [-100.175, -100.175] - loss: 8.544 - mae: 41.862 - mean_q: 54.253 Interval 114 (56500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3267 1 episodes - episode_reward: 146.711 [146.711, 146.711] - loss: 13.408 - mae: 42.164 - mean_q: 54.503 Interval 115 (57000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3207 1 episodes - episode_reward: 213.894 [213.894, 213.894] - loss: 10.247 - mae: 41.670 - mean_q: 53.769 Interval 116 (57500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1819 2 episodes - episode_reward: -30.541 [-184.879, 123.797] - loss: 8.926 - mae: 41.593 - mean_q: 53.234 Interval 117 (58000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3577 1 episodes - episode_reward: 158.394 [158.394, 158.394] - loss: 8.689 - mae: 41.400 - mean_q: 53.629 Interval 118 (58500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6033 2 episodes - episode_reward: -190.270 [-240.684, -139.857] - loss: 8.053 - mae: 41.160 - mean_q: 53.420 Interval 119 (59000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1589 Interval 120 (59500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2652 1 episodes - episode_reward: 153.436 [153.436, 153.436] - loss: 11.111 - mae: 41.062 - mean_q: 53.175 Interval 121 (60000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1890 1 episodes - episode_reward: -117.316 [-117.316, -117.316] - loss: 6.438 - mae: 41.234 - mean_q: 53.478 Interval 122 (60500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4384 1 episodes - episode_reward: 109.887 [109.887, 109.887] - loss: 10.701 - mae: 41.104 - mean_q: 52.914 Interval 123 (61000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2190 1 episodes - episode_reward: 235.087 [235.087, 235.087] - loss: 8.641 - mae: 41.367 - mean_q: 53.282 Interval 124 (61500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0658 Interval 125 (62000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1479 Interval 126 (62500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0316 Interval 127 (63000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4392 1 episodes - episode_reward: -33.591 [-33.591, -33.591] - loss: 8.017 - mae: 41.211 - mean_q: 52.659 Interval 128 (63500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2779 1 episodes - episode_reward: 219.066 [219.066, 219.066] - loss: 8.768 - mae: 41.301 - mean_q: 53.135 Interval 129 (64000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3329 1 episodes - episode_reward: 163.465 [163.465, 163.465] - loss: 8.040 - mae: 41.305 - mean_q: 53.204 Interval 130 (64500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2079 Interval 131 (65000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6029 1 episodes - episode_reward: 172.408 [172.408, 172.408] - loss: 7.955 - mae: 41.690 - mean_q: 53.570 Interval 132 (65500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4123 1 episodes - episode_reward: 256.119 [256.119, 256.119] - loss: 7.517 - mae: 42.109 - mean_q: 54.006 Interval 133 (66000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1001 Interval 134 (66500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2509 1 episodes - episode_reward: 174.242 [174.242, 174.242] - loss: 13.247 - mae: 42.177 - mean_q: 54.493 Interval 135 (67000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0939 Interval 136 (67500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5008 1 episodes - episode_reward: 163.938 [163.938, 163.938] - loss: 10.906 - mae: 42.043 - mean_q: 53.990 Interval 137 (68000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2256 1 episodes - episode_reward: 240.052 [240.052, 240.052] - loss: 7.169 - mae: 41.489 - mean_q: 53.565 Interval 138 (68500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1944 Interval 139 (69000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2068 2 episodes - episode_reward: 84.754 [-53.768, 223.277] - loss: 6.668 - mae: 41.493 - mean_q: 53.964 Interval 140 (69500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1753 Interval 141 (70000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1719 Interval 142 (70500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2361 2 episodes - episode_reward: 0.199 [-297.694, 298.093] - loss: 12.117 - mae: 41.350 - mean_q: 53.323 Interval 143 (71000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2606 2 episodes - episode_reward: -117.467 [-118.163, -116.771] - loss: 7.723 - mae: 40.729 - mean_q: 52.726 Interval 144 (71500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0318 Interval 145 (72000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0372 Interval 146 (72500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0846 Interval 147 (73000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3523 1 episodes - episode_reward: 244.040 [244.040, 244.040] - loss: 8.371 - mae: 40.686 - mean_q: 52.616 Interval 148 (73500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1615 1 episodes - episode_reward: 162.025 [162.025, 162.025] - loss: 7.271 - mae: 40.468 - mean_q: 52.696 Interval 149 (74000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2946 2 episodes - episode_reward: 42.517 [-101.864, 186.897] - loss: 5.943 - mae: 40.326 - mean_q: 52.500 Interval 150 (74500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1215 Interval 151 (75000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0032 Interval 152 (75500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3157 1 episodes - episode_reward: 216.113 [216.113, 216.113] - loss: 7.960 - mae: 40.702 - mean_q: 52.415 Interval 153 (76000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3466 1 episodes - episode_reward: 157.383 [157.383, 157.383] - loss: 8.022 - mae: 40.691 - mean_q: 52.563 Interval 154 (76500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4100 4 episodes - episode_reward: -32.761 [-381.055, 206.277] - loss: 6.160 - mae: 40.538 - mean_q: 52.422 Interval 155 (77000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7463 1 episodes - episode_reward: 228.501 [228.501, 228.501] - loss: 12.775 - mae: 40.391 - mean_q: 51.878 Interval 156 (77500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2622 2 episodes - episode_reward: 120.250 [-26.288, 266.789] - loss: 6.936 - mae: 40.429 - mean_q: 52.728 Interval 157 (78000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6323 1 episodes - episode_reward: -287.893 [-287.893, -287.893] - loss: 7.566 - mae: 39.927 - mean_q: 51.910 Interval 158 (78500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1957 2 episodes - episode_reward: 0.298 [-115.466, 116.062] - loss: 8.048 - mae: 39.682 - mean_q: 51.728 Interval 159 (79000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1273 Interval 160 (79500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5097 1 episodes - episode_reward: 268.223 [268.223, 268.223] - loss: 5.888 - mae: 39.838 - mean_q: 51.842 Interval 161 (80000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2994 1 episodes - episode_reward: 255.963 [255.963, 255.963] - loss: 5.261 - mae: 39.743 - mean_q: 51.444 Interval 162 (80500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5190 1 episodes - episode_reward: 243.986 [243.986, 243.986] - loss: 6.256 - mae: 39.763 - mean_q: 51.585 Interval 163 (81000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5785 3 episodes - episode_reward: 115.297 [-130.464, 269.132] - loss: 7.701 - mae: 39.827 - mean_q: 51.380 Interval 164 (81500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0663 Interval 165 (82000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1431 1 episodes - episode_reward: 82.627 [82.627, 82.627] - loss: 5.480 - mae: 39.578 - mean_q: 50.915 Interval 166 (82500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3523 1 episodes - episode_reward: 232.670 [232.670, 232.670] - loss: 5.722 - mae: 39.683 - mean_q: 51.033 Interval 167 (83000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1847 2 episodes - episode_reward: -131.733 [-163.467, -100.000] - loss: 7.716 - mae: 39.686 - mean_q: 50.752 Interval 168 (83500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5975 2 episodes - episode_reward: 214.851 [182.863, 246.840] - loss: 6.135 - mae: 39.698 - mean_q: 50.535 Interval 169 (84000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1568 Interval 170 (84500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4779 1 episodes - episode_reward: 187.733 [187.733, 187.733] - loss: 8.739 - mae: 39.983 - mean_q: 51.097 Interval 171 (85000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1917 1 episodes - episode_reward: 236.969 [236.969, 236.969] - loss: 8.449 - mae: 40.178 - mean_q: 51.193 Interval 172 (85500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4498 1 episodes - episode_reward: 168.077 [168.077, 168.077] - loss: 9.294 - mae: 40.492 - mean_q: 50.894 Interval 173 (86000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0811 Interval 174 (86500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6044 2 episodes - episode_reward: 154.968 [117.817, 192.119] - loss: 8.251 - mae: 40.663 - mean_q: 51.850 Interval 175 (87000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5183 1 episodes - episode_reward: 250.216 [250.216, 250.216] - loss: 6.179 - mae: 40.657 - mean_q: 51.837 Interval 176 (87500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4319 4 episodes - episode_reward: -50.289 [-109.611, 30.141] - loss: 4.592 - mae: 41.176 - mean_q: 52.504 Interval 177 (88000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2790 2 episodes - episode_reward: 94.492 [-31.369, 220.353] - loss: 8.654 - mae: 41.188 - mean_q: 52.691 Interval 178 (88500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5078 1 episodes - episode_reward: 146.279 [146.279, 146.279] - loss: 6.602 - mae: 42.307 - mean_q: 54.562 Interval 179 (89000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6208 1 episodes - episode_reward: 258.930 [258.930, 258.930] - loss: 6.671 - mae: 42.119 - mean_q: 53.939 Interval 180 (89500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1426 Interval 181 (90000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0121 Interval 182 (90500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0360 Interval 183 (91000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1444 2 episodes - episode_reward: -27.164 [-186.579, 132.251] - loss: 7.721 - mae: 42.529 - mean_q: 54.276 Interval 184 (91500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3853 1 episodes - episode_reward: 275.916 [275.916, 275.916] - loss: 8.685 - mae: 42.776 - mean_q: 55.062 Interval 185 (92000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0346 3 episodes - episode_reward: 12.905 [-92.302, 182.554] - loss: 5.714 - mae: 43.235 - mean_q: 55.788 Interval 186 (92500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5172 1 episodes - episode_reward: 255.163 [255.163, 255.163] - loss: 6.523 - mae: 43.462 - mean_q: 56.231 Interval 187 (93000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1253 1 episodes - episode_reward: -35.798 [-35.798, -35.798] - loss: 7.067 - mae: 43.730 - mean_q: 56.548 Interval 188 (93500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1181 3 episodes - episode_reward: 44.655 [-111.891, 254.498] - loss: 7.210 - mae: 43.885 - mean_q: 56.431 Interval 189 (94000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0979 Interval 190 (94500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0071 2 episodes - episode_reward: 67.398 [-102.204, 237.000] - loss: 8.538 - mae: 43.217 - mean_q: 56.197 Interval 191 (95000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5603 2 episodes - episode_reward: 100.143 [-31.907, 232.194] - loss: 7.414 - mae: 43.465 - mean_q: 56.500 Interval 192 (95500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0238 2 episodes - episode_reward: -1.451 [-52.272, 49.371] - loss: 9.368 - mae: 43.365 - mean_q: 56.521 Interval 193 (96000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7878 2 episodes - episode_reward: 227.181 [208.593, 245.768] - loss: 8.568 - mae: 43.798 - mean_q: 57.021 Interval 194 (96500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7287 1 episodes - episode_reward: 264.871 [264.871, 264.871] - loss: 8.072 - mae: 43.808 - mean_q: 57.074 Interval 195 (97000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1497 2 episodes - episode_reward: 67.273 [-99.223, 233.769] - loss: 9.058 - mae: 43.629 - mean_q: 56.716 Interval 196 (97500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0827 Interval 197 (98000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5448 2 episodes - episode_reward: 75.402 [28.004, 122.800] - loss: 8.786 - mae: 43.735 - mean_q: 56.984 Interval 198 (98500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2847 1 episodes - episode_reward: 227.980 [227.980, 227.980] - loss: 10.533 - mae: 43.399 - mean_q: 56.459 Interval 199 (99000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9600 3 episodes - episode_reward: -175.233 [-585.053, 158.871] - loss: 10.501 - mae: 43.306 - mean_q: 56.204 Interval 200 (99500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5437 1 episodes - episode_reward: 227.487 [227.487, 227.487] - loss: 7.689 - mae: 43.289 - mean_q: 56.124 Interval 201 (100000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5065 3 episodes - episode_reward: 137.116 [-95.628, 258.863] - loss: 9.186 - mae: 43.200 - mean_q: 56.243 Interval 202 (100500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6399 1 episodes - episode_reward: 267.160 [267.160, 267.160] - loss: 8.632 - mae: 43.482 - mean_q: 56.365 Interval 203 (101000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2372 1 episodes - episode_reward: 167.550 [167.550, 167.550] - loss: 7.512 - mae: 43.510 - mean_q: 56.448 Interval 204 (101500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4768 1 episodes - episode_reward: 215.118 [215.118, 215.118] - loss: 4.872 - mae: 43.329 - mean_q: 56.205 Interval 205 (102000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6843 1 episodes - episode_reward: 223.109 [223.109, 223.109] - loss: 6.955 - mae: 43.699 - mean_q: 56.776 Interval 206 (102500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1418 2 episodes - episode_reward: 39.026 [-164.279, 242.330] - loss: 7.718 - mae: 44.016 - mean_q: 56.504 Interval 207 (103000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5541 1 episodes - episode_reward: 225.461 [225.461, 225.461] - loss: 8.520 - mae: 43.607 - mean_q: 56.537 Interval 208 (103500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5136 1 episodes - episode_reward: 280.404 [280.404, 280.404] - loss: 9.511 - mae: 43.682 - mean_q: 56.482 Interval 209 (104000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.9256 2 episodes - episode_reward: 277.039 [255.633, 298.445] - loss: 7.646 - mae: 44.021 - mean_q: 57.059 Interval 210 (104500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4513 1 episodes - episode_reward: 226.981 [226.981, 226.981] - loss: 7.916 - mae: 43.802 - mean_q: 56.499 Interval 211 (105000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0161 Interval 212 (105500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0738 Interval 213 (106000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4457 1 episodes - episode_reward: 128.338 [128.338, 128.338] - loss: 5.859 - mae: 43.169 - mean_q: 56.601 Interval 214 (106500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3214 1 episodes - episode_reward: 236.869 [236.869, 236.869] - loss: 6.603 - mae: 43.404 - mean_q: 56.673 Interval 215 (107000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5224 1 episodes - episode_reward: 195.654 [195.654, 195.654] - loss: 6.392 - mae: 43.438 - mean_q: 56.475 Interval 216 (107500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4946 1 episodes - episode_reward: 260.271 [260.271, 260.271] - loss: 6.231 - mae: 43.406 - mean_q: 56.370 Interval 217 (108000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3275 1 episodes - episode_reward: 203.084 [203.084, 203.084] - loss: 5.353 - mae: 42.872 - mean_q: 55.991 Interval 218 (108500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0248 Interval 219 (109000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2438 2 episodes - episode_reward: 73.874 [-28.088, 175.836] - loss: 8.969 - mae: 42.797 - mean_q: 56.038 Interval 220 (109500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0663 Interval 221 (110000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5020 1 episodes - episode_reward: 188.813 [188.813, 188.813] - loss: 6.077 - mae: 42.644 - mean_q: 55.982 Interval 222 (110500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2851 1 episodes - episode_reward: 258.444 [258.444, 258.444] - loss: 7.550 - mae: 42.736 - mean_q: 56.252 Interval 223 (111000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3635 2 episodes - episode_reward: 50.188 [-98.594, 198.969] - loss: 7.201 - mae: 42.624 - mean_q: 56.167 Interval 224 (111500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1952 1 episodes - episode_reward: 254.764 [254.764, 254.764] - loss: 5.631 - mae: 43.264 - mean_q: 57.020 Interval 225 (112000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3020 Interval 226 (112500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0420 Interval 227 (113000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8897 2 episodes - episode_reward: 238.936 [229.729, 248.142] - loss: 5.502 - mae: 43.668 - mean_q: 57.372 Interval 228 (113500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1172 Interval 229 (114000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5697 3 episodes - episode_reward: 136.004 [-103.609, 255.935] - loss: 8.402 - mae: 44.527 - mean_q: 58.220 Interval 230 (114500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.9124 1 episodes - episode_reward: 302.406 [302.406, 302.406] - loss: 7.869 - mae: 44.869 - mean_q: 58.594 Interval 231 (115000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3694 1 episodes - episode_reward: 282.774 [282.774, 282.774] - loss: 6.374 - mae: 44.480 - mean_q: 58.358 Interval 232 (115500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5889 1 episodes - episode_reward: 262.357 [262.357, 262.357] - loss: 9.451 - mae: 44.507 - mean_q: 58.373 Interval 233 (116000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1742 1 episodes - episode_reward: 201.414 [201.414, 201.414] - loss: 8.081 - mae: 44.388 - mean_q: 58.121 Interval 234 (116500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5321 2 episodes - episode_reward: -180.910 [-253.721, -108.099] - loss: 5.825 - mae: 44.419 - mean_q: 58.251 Interval 235 (117000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1421 1 episodes - episode_reward: 170.885 [170.885, 170.885] - loss: 8.233 - mae: 44.750 - mean_q: 58.961 Interval 236 (117500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3180 1 episodes - episode_reward: 170.575 [170.575, 170.575] - loss: 8.254 - mae: 45.297 - mean_q: 59.357 Interval 237 (118000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6636 1 episodes - episode_reward: 226.358 [226.358, 226.358] - loss: 8.232 - mae: 45.693 - mean_q: 60.156 Interval 238 (118500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5659 1 episodes - episode_reward: 269.990 [269.990, 269.990] - loss: 7.397 - mae: 45.968 - mean_q: 60.092 Interval 239 (119000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5109 2 episodes - episode_reward: 182.684 [156.573, 208.795] - loss: 8.317 - mae: 45.896 - mean_q: 60.059 Interval 240 (119500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1760 Interval 241 (120000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5516 1 episodes - episode_reward: 167.336 [167.336, 167.336] - loss: 8.289 - mae: 45.802 - mean_q: 60.183 Interval 242 (120500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0261 Interval 243 (121000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7264 2 episodes - episode_reward: 280.915 [253.267, 308.562] - loss: 8.148 - mae: 46.001 - mean_q: 60.386 Interval 244 (121500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3461 3 episodes - episode_reward: -91.654 [-110.546, -58.922] - loss: 6.435 - mae: 46.316 - mean_q: 60.942 Interval 245 (122000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6332 3 episodes - episode_reward: 110.620 [-112.277, 232.481] - loss: 9.197 - mae: 46.780 - mean_q: 61.452 Interval 246 (122500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1348 2 episodes - episode_reward: 65.807 [-109.472, 241.087] - loss: 9.516 - mae: 47.079 - mean_q: 61.814 Interval 247 (123000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4656 1 episodes - episode_reward: 214.504 [214.504, 214.504] - loss: 5.728 - mae: 47.149 - mean_q: 61.788 Interval 248 (123500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8514 2 episodes - episode_reward: 223.153 [207.513, 238.793] - loss: 10.823 - mae: 47.454 - mean_q: 62.219 Interval 249 (124000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7114 1 episodes - episode_reward: 252.628 [252.628, 252.628] - loss: 7.710 - mae: 47.322 - mean_q: 62.151 Interval 250 (124500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3943 1 episodes - episode_reward: 232.589 [232.589, 232.589] - loss: 5.819 - mae: 47.404 - mean_q: 62.279 Interval 251 (125000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2971 4 episodes - episode_reward: 49.785 [-152.747, 269.269] - loss: 9.557 - mae: 47.429 - mean_q: 62.471 Interval 252 (125500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0763 Interval 253 (126000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0341 Interval 254 (126500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3511 1 episodes - episode_reward: 166.922 [166.922, 166.922] - loss: 6.792 - mae: 47.319 - mean_q: 62.336 Interval 255 (127000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3849 1 episodes - episode_reward: 174.782 [174.782, 174.782] - loss: 6.672 - mae: 47.208 - mean_q: 62.369 Interval 256 (127500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6243 3 episodes - episode_reward: 103.862 [-115.107, 232.741] - loss: 7.342 - mae: 47.371 - mean_q: 62.282 Interval 257 (128000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6220 1 episodes - episode_reward: 293.317 [293.317, 293.317] - loss: 6.842 - mae: 47.581 - mean_q: 62.717 Interval 258 (128500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3852 1 episodes - episode_reward: 247.146 [247.146, 247.146] - loss: 10.828 - mae: 47.185 - mean_q: 62.311 Interval 259 (129000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0370 Interval 260 (129500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3010 1 episodes - episode_reward: 172.088 [172.088, 172.088] - loss: 7.949 - mae: 47.427 - mean_q: 62.391 Interval 261 (130000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0438 2 episodes - episode_reward: -89.442 [-151.732, -27.151] - loss: 8.261 - mae: 47.417 - mean_q: 62.334 Interval 262 (130500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5254 1 episodes - episode_reward: 310.071 [310.071, 310.071] - loss: 8.407 - mae: 47.275 - mean_q: 62.046 Interval 263 (131000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1131 2 episodes - episode_reward: 3.251 [-255.323, 261.825] - loss: 7.069 - mae: 47.643 - mean_q: 62.988 Interval 264 (131500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3744 1 episodes - episode_reward: 218.347 [218.347, 218.347] - loss: 7.864 - mae: 47.833 - mean_q: 63.073 Interval 265 (132000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0243 2 episodes - episode_reward: 21.029 [-143.665, 185.723] - loss: 6.838 - mae: 47.190 - mean_q: 62.271 Interval 266 (132500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7154 2 episodes - episode_reward: -164.674 [-173.121, -156.227] - loss: 9.332 - mae: 46.915 - mean_q: 61.841 Interval 267 (133000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0274 Interval 268 (133500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6238 1 episodes - episode_reward: 99.597 [99.597, 99.597] - loss: 9.706 - mae: 46.042 - mean_q: 60.298 Interval 269 (134000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6610 2 episodes - episode_reward: 255.416 [192.857, 317.976] - loss: 9.685 - mae: 46.167 - mean_q: 60.557 Interval 270 (134500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0230 Interval 271 (135000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4427 1 episodes - episode_reward: 141.876 [141.876, 141.876] - loss: 6.835 - mae: 46.056 - mean_q: 60.485 Interval 272 (135500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0930 2 episodes - episode_reward: 33.186 [-157.963, 224.335] - loss: 6.126 - mae: 46.005 - mean_q: 60.379 Interval 273 (136000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8357 2 episodes - episode_reward: 201.414 [181.968, 220.859] - loss: 6.594 - mae: 46.187 - mean_q: 60.708 Interval 274 (136500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4977 1 episodes - episode_reward: 308.635 [308.635, 308.635] - loss: 7.145 - mae: 46.369 - mean_q: 60.379 Interval 275 (137000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4631 1 episodes - episode_reward: 209.959 [209.959, 209.959] - loss: 7.845 - mae: 46.287 - mean_q: 60.381 Interval 276 (137500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1786 2 episodes - episode_reward: 39.297 [-100.000, 178.593] - loss: 7.723 - mae: 45.860 - mean_q: 60.013 Interval 277 (138000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0809 2 episodes - episode_reward: 61.286 [-103.815, 226.386] - loss: 6.614 - mae: 45.661 - mean_q: 59.943 Interval 278 (138500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7577 1 episodes - episode_reward: 218.467 [218.467, 218.467] - loss: 8.792 - mae: 45.672 - mean_q: 59.776 Interval 279 (139000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4991 1 episodes - episode_reward: 271.920 [271.920, 271.920] - loss: 6.649 - mae: 45.636 - mean_q: 59.742 Interval 280 (139500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1806 1 episodes - episode_reward: 248.193 [248.193, 248.193] - loss: 9.593 - mae: 45.642 - mean_q: 59.785 Interval 281 (140000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1837 Interval 282 (140500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1930 Interval 283 (141000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1603 Interval 284 (141500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3372 1 episodes - episode_reward: -145.564 [-145.564, -145.564] - loss: 6.365 - mae: 45.142 - mean_q: 59.094 Interval 285 (142000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4406 1 episodes - episode_reward: 182.064 [182.064, 182.064] - loss: 7.056 - mae: 44.763 - mean_q: 58.593 Interval 286 (142500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3119 1 episodes - episode_reward: 184.227 [184.227, 184.227] - loss: 5.043 - mae: 44.821 - mean_q: 58.717 Interval 287 (143000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4561 1 episodes - episode_reward: 168.935 [168.935, 168.935] - loss: 12.020 - mae: 44.581 - mean_q: 58.395 Interval 288 (143500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5004 1 episodes - episode_reward: 204.769 [204.769, 204.769] - loss: 4.859 - mae: 44.495 - mean_q: 58.545 Interval 289 (144000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0257 Interval 290 (144500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3088 1 episodes - episode_reward: 272.692 [272.692, 272.692] - loss: 5.705 - mae: 44.397 - mean_q: 58.406 Interval 291 (145000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1177 2 episodes - episode_reward: 38.827 [-48.334, 125.987] - loss: 5.244 - mae: 44.148 - mean_q: 57.994 Interval 292 (145500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1203 Interval 293 (146000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5066 3 episodes - episode_reward: 95.307 [-136.059, 235.815] - loss: 6.162 - mae: 43.695 - mean_q: 57.277 Interval 294 (146500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2700 2 episodes - episode_reward: 35.192 [-102.337, 172.720] - loss: 9.106 - mae: 44.189 - mean_q: 57.666 Interval 295 (147000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4018 2 episodes - episode_reward: 100.181 [-19.971, 220.333] - loss: 6.059 - mae: 44.113 - mean_q: 57.735 Interval 296 (147500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0273 2 episodes - episode_reward: 63.394 [-105.876, 232.664] - loss: 6.318 - mae: 43.816 - mean_q: 57.398 Interval 297 (148000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2271 1 episodes - episode_reward: 0.126 [0.126, 0.126] - loss: 5.376 - mae: 43.960 - mean_q: 57.691 Interval 298 (148500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0935 2 episodes - episode_reward: 78.171 [-86.968, 243.310] - loss: 7.274 - mae: 43.731 - mean_q: 56.998 Interval 299 (149000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5897 2 episodes - episode_reward: 95.811 [-71.672, 263.294] - loss: 6.623 - mae: 44.165 - mean_q: 57.646 Interval 300 (149500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5634 1 episodes - episode_reward: 222.986 [222.986, 222.986] - loss: 5.113 - mae: 44.296 - mean_q: 57.848 Interval 301 (150000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2396 2 episodes - episode_reward: -68.457 [-77.216, -59.698] - loss: 8.260 - mae: 43.745 - mean_q: 57.266 Interval 302 (150500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0088 Interval 303 (151000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1345 1 episodes - episode_reward: 183.514 [183.514, 183.514] - loss: 5.767 - mae: 43.774 - mean_q: 57.125 Interval 304 (151500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5157 1 episodes - episode_reward: 151.564 [151.564, 151.564] - loss: 7.582 - mae: 43.510 - mean_q: 56.845 Interval 305 (152000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0910 3 episodes - episode_reward: -3.992 [-158.947, 202.092] - loss: 6.054 - mae: 43.436 - mean_q: 56.500 Interval 306 (152500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0241 2 episodes - episode_reward: -13.245 [-250.340, 223.849] - loss: 8.538 - mae: 43.572 - mean_q: 57.014 Interval 307 (153000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0049 Interval 308 (153500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2703 1 episodes - episode_reward: 144.983 [144.983, 144.983] - loss: 8.390 - mae: 42.892 - mean_q: 56.199 Interval 309 (154000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0134 Interval 310 (154500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2265 1 episodes - episode_reward: 138.586 [138.586, 138.586] - loss: 5.522 - mae: 43.008 - mean_q: 56.092 Interval 311 (155000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0450 2 episodes - episode_reward: -45.196 [-233.290, 142.898] - loss: 6.845 - mae: 42.842 - mean_q: 55.751 Interval 312 (155500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4584 1 episodes - episode_reward: 240.755 [240.755, 240.755] - loss: 7.547 - mae: 43.578 - mean_q: 56.888 Interval 313 (156000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5270 1 episodes - episode_reward: 208.561 [208.561, 208.561] - loss: 9.400 - mae: 43.555 - mean_q: 57.106 Interval 314 (156500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5353 3 episodes - episode_reward: 159.804 [-63.408, 273.398] - loss: 8.321 - mae: 43.521 - mean_q: 56.822 Interval 315 (157000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3459 1 episodes - episode_reward: 190.024 [190.024, 190.024] - loss: 5.478 - mae: 43.415 - mean_q: 56.622 Interval 316 (157500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0858 1 episodes - episode_reward: -95.164 [-95.164, -95.164] - loss: 10.974 - mae: 43.142 - mean_q: 56.064 Interval 317 (158000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8290 2 episodes - episode_reward: 221.391 [180.467, 262.314] - loss: 9.815 - mae: 43.201 - mean_q: 56.286 Interval 318 (158500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1247 2 episodes - episode_reward: 47.522 [-142.492, 237.536] - loss: 8.704 - mae: 43.820 - mean_q: 57.486 Interval 319 (159000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4714 1 episodes - episode_reward: 233.002 [233.002, 233.002] - loss: 8.228 - mae: 43.359 - mean_q: 56.392 Interval 320 (159500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0581 3 episodes - episode_reward: 18.105 [-100.000, 203.002] - loss: 8.685 - mae: 43.451 - mean_q: 56.575 Interval 321 (160000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6261 1 episodes - episode_reward: 235.873 [235.873, 235.873] - loss: 6.696 - mae: 42.984 - mean_q: 56.136 Interval 322 (160500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0648 Interval 323 (161000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4324 1 episodes - episode_reward: 128.591 [128.591, 128.591] - loss: 9.267 - mae: 43.196 - mean_q: 56.121 Interval 324 (161500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3088 1 episodes - episode_reward: 208.524 [208.524, 208.524] - loss: 9.302 - mae: 43.162 - mean_q: 56.005 Interval 325 (162000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8212 3 episodes - episode_reward: -135.702 [-321.291, 9.178] - loss: 8.555 - mae: 43.013 - mean_q: 55.647 Interval 326 (162500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3863 4 episodes - episode_reward: -57.395 [-171.507, 173.846] - loss: 8.736 - mae: 43.117 - mean_q: 55.542 Interval 327 (163000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3130 5 episodes - episode_reward: -12.276 [-127.383, 210.267] - loss: 9.876 - mae: 43.396 - mean_q: 56.128 Interval 328 (163500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4910 1 episodes - episode_reward: 249.703 [249.703, 249.703] - loss: 7.908 - mae: 43.536 - mean_q: 56.262 Interval 329 (164000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2886 2 episodes - episode_reward: 69.044 [-152.958, 291.045] - loss: 8.989 - mae: 43.747 - mean_q: 56.136 Interval 330 (164500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1440 Interval 331 (165000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7255 2 episodes - episode_reward: 186.728 [184.513, 188.944] - loss: 9.781 - mae: 43.035 - mean_q: 55.343 Interval 332 (165500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4845 1 episodes - episode_reward: 201.464 [201.464, 201.464] - loss: 6.514 - mae: 43.573 - mean_q: 55.952 Interval 333 (166000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2120 2 episodes - episode_reward: -6.796 [-240.538, 226.946] - loss: 6.714 - mae: 43.330 - mean_q: 55.912 Interval 334 (166500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6725 1 episodes - episode_reward: 284.540 [284.540, 284.540] - loss: 8.348 - mae: 43.374 - mean_q: 55.960 Interval 335 (167000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0656 2 episodes - episode_reward: 0.892 [-247.320, 249.104] - loss: 10.645 - mae: 43.352 - mean_q: 56.160 Interval 336 (167500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5975 2 episodes - episode_reward: 115.877 [-1.873, 233.627] - loss: 14.848 - mae: 43.241 - mean_q: 56.095 Interval 337 (168000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2875 2 episodes - episode_reward: 122.545 [-29.971, 275.060] - loss: 6.887 - mae: 42.855 - mean_q: 55.593 Interval 338 (168500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4477 2 episodes - episode_reward: 84.282 [-38.585, 207.149] - loss: 10.209 - mae: 42.895 - mean_q: 55.547 Interval 339 (169000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2868 2 episodes - episode_reward: -109.946 [-118.127, -101.764] - loss: 12.190 - mae: 43.105 - mean_q: 55.625 Interval 340 (169500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6226 2 episodes - episode_reward: 230.981 [220.950, 241.013] - loss: 7.926 - mae: 43.166 - mean_q: 55.605 Interval 341 (170000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2222 4 episodes - episode_reward: -26.115 [-100.000, 59.029] - loss: 7.157 - mae: 42.998 - mean_q: 55.223 Interval 342 (170500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4618 2 episodes - episode_reward: -170.159 [-178.569, -161.749] - loss: 8.483 - mae: 42.809 - mean_q: 54.694 Interval 343 (171000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6128 1 episodes - episode_reward: 229.099 [229.099, 229.099] - loss: 9.765 - mae: 42.979 - mean_q: 55.008 Interval 344 (171500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1327 4 episodes - episode_reward: 28.684 [-122.198, 269.443] - loss: 10.875 - mae: 42.790 - mean_q: 54.440 Interval 345 (172000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6207 1 episodes - episode_reward: 190.568 [190.568, 190.568] - loss: 8.700 - mae: 42.364 - mean_q: 54.277 Interval 346 (172500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0167 Interval 347 (173000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2577 2 episodes - episode_reward: 92.302 [-15.311, 199.915] - loss: 10.126 - mae: 41.734 - mean_q: 53.248 Interval 348 (173500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2899 1 episodes - episode_reward: 176.519 [176.519, 176.519] - loss: 10.673 - mae: 42.008 - mean_q: 53.608 Interval 349 (174000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.9927 2 episodes - episode_reward: 253.307 [245.089, 261.525] - loss: 10.434 - mae: 41.825 - mean_q: 53.115 Interval 350 (174500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3891 2 episodes - episode_reward: 45.773 [-135.245, 226.791] - loss: 13.071 - mae: 41.810 - mean_q: 53.167 Interval 351 (175000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5810 4 episodes - episode_reward: -31.051 [-300.554, 299.405] - loss: 9.690 - mae: 41.655 - mean_q: 53.247 Interval 352 (175500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5885 2 episodes - episode_reward: 105.358 [-41.057, 251.772] - loss: 10.351 - mae: 41.477 - mean_q: 53.026 Interval 353 (176000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8878 2 episodes - episode_reward: 222.655 [222.245, 223.066] - loss: 12.336 - mae: 41.415 - mean_q: 52.713 Interval 354 (176500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0441 4 episodes - episode_reward: 9.200 [-107.834, 261.858] - loss: 10.570 - mae: 41.602 - mean_q: 53.091 Interval 355 (177000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0307 4 episodes - episode_reward: 11.876 [-100.000, 225.738] - loss: 8.674 - mae: 41.705 - mean_q: 53.440 Interval 356 (177500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0254 Interval 357 (178000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4504 2 episodes - episode_reward: -154.311 [-165.075, -143.547] - loss: 12.147 - mae: 41.410 - mean_q: 52.737 Interval 358 (178500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4635 2 episodes - episode_reward: 148.456 [21.683, 275.229] - loss: 13.153 - mae: 41.695 - mean_q: 53.285 Interval 359 (179000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4228 3 episodes - episode_reward: -234.781 [-435.113, -2.605] - loss: 13.952 - mae: 41.853 - mean_q: 53.362 Interval 360 (179500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1074 1 episodes - episode_reward: -99.757 [-99.757, -99.757] - loss: 10.486 - mae: 41.896 - mean_q: 53.152 Interval 361 (180000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0194 Interval 362 (180500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4049 1 episodes - episode_reward: 243.431 [243.431, 243.431] - loss: 10.350 - mae: 41.578 - mean_q: 53.258 Interval 363 (181000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1592 Interval 364 (181500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3054 1 episodes - episode_reward: 143.932 [143.932, 143.932] - loss: 8.876 - mae: 41.394 - mean_q: 53.614 Interval 365 (182000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4923 2 episodes - episode_reward: -121.739 [-225.564, -17.913] - loss: 10.468 - mae: 41.583 - mean_q: 53.831 Interval 366 (182500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1139 Interval 367 (183000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4386 1 episodes - episode_reward: 187.664 [187.664, 187.664] - loss: 9.373 - mae: 42.170 - mean_q: 54.364 Interval 368 (183500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1723 2 episodes - episode_reward: -23.255 [-262.308, 215.799] - loss: 9.439 - mae: 42.301 - mean_q: 54.511 Interval 369 (184000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1474 2 episodes - episode_reward: 67.820 [-67.286, 202.927] - loss: 12.171 - mae: 42.253 - mean_q: 54.593 Interval 370 (184500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3630 2 episodes - episode_reward: 98.787 [-45.188, 242.763] - loss: 9.045 - mae: 42.683 - mean_q: 54.921 Interval 371 (185000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2654 1 episodes - episode_reward: -206.069 [-206.069, -206.069] - loss: 9.926 - mae: 42.680 - mean_q: 54.987 Interval 372 (185500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0963 2 episodes - episode_reward: 12.868 [-196.518, 222.255] - loss: 10.235 - mae: 42.852 - mean_q: 54.957 Interval 373 (186000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0665 1 episodes - episode_reward: 217.687 [217.687, 217.687] - loss: 9.336 - mae: 42.812 - mean_q: 54.857 Interval 374 (186500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4241 1 episodes - episode_reward: -390.723 [-390.723, -390.723] - loss: 13.246 - mae: 43.116 - mean_q: 55.197 Interval 375 (187000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1121 2 episodes - episode_reward: -1.130 [-195.828, 193.567] - loss: 13.831 - mae: 43.061 - mean_q: 54.776 Interval 376 (187500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8055 2 episodes - episode_reward: -174.301 [-276.083, -72.519] - loss: 13.782 - mae: 43.033 - mean_q: 54.691 Interval 377 (188000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0551 Interval 378 (188500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0558 2 episodes - episode_reward: 8.940 [-90.041, 107.920] - loss: 12.102 - mae: 42.860 - mean_q: 53.986 Interval 379 (189000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0741 Interval 380 (189500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3207 2 episodes - episode_reward: 23.699 [-105.763, 153.161] - loss: 14.076 - mae: 43.247 - mean_q: 54.407 Interval 381 (190000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5271 1 episodes - episode_reward: 258.687 [258.687, 258.687] - loss: 9.204 - mae: 43.065 - mean_q: 54.629 Interval 382 (190500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3006 1 episodes - episode_reward: 269.732 [269.732, 269.732] - loss: 12.359 - mae: 43.131 - mean_q: 54.678 Interval 383 (191000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1757 Interval 384 (191500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2126 1 episodes - episode_reward: 246.026 [246.026, 246.026] - loss: 9.960 - mae: 43.353 - mean_q: 54.882 Interval 385 (192000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2284 Interval 386 (192500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4086 1 episodes - episode_reward: 237.448 [237.448, 237.448] - loss: 11.824 - mae: 43.707 - mean_q: 55.298 Interval 387 (193000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1069 2 episodes - episode_reward: 45.013 [-108.817, 198.842] - loss: 11.856 - mae: 43.390 - mean_q: 54.801 Interval 388 (193500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0869 Interval 389 (194000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3263 1 episodes - episode_reward: 190.205 [190.205, 190.205] - loss: 10.844 - mae: 43.224 - mean_q: 55.041 Interval 390 (194500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7900 2 episodes - episode_reward: 203.360 [163.522, 243.198] - loss: 10.805 - mae: 43.397 - mean_q: 54.996 Interval 391 (195000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2362 1 episodes - episode_reward: 163.600 [163.600, 163.600] - loss: 9.545 - mae: 43.555 - mean_q: 55.677 Interval 392 (195500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1528 Interval 393 (196000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3389 1 episodes - episode_reward: 198.304 [198.304, 198.304] - loss: 13.248 - mae: 43.606 - mean_q: 55.822 Interval 394 (196500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4899 1 episodes - episode_reward: 249.453 [249.453, 249.453] - loss: 9.352 - mae: 43.638 - mean_q: 55.835 Interval 395 (197000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0609 Interval 396 (197500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4240 3 episodes - episode_reward: 109.037 [-156.046, 303.177] - loss: 8.256 - mae: 43.771 - mean_q: 55.940 Interval 397 (198000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2968 2 episodes - episode_reward: 55.465 [-151.543, 262.472] - loss: 9.753 - mae: 43.419 - mean_q: 55.337 Interval 398 (198500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4494 3 episodes - episode_reward: -95.508 [-109.599, -81.967] - loss: 11.113 - mae: 43.536 - mean_q: 54.994 Interval 399 (199000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5978 3 episodes - episode_reward: 125.775 [11.987, 201.290] - loss: 11.586 - mae: 43.647 - mean_q: 55.141 Interval 400 (199500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1510 3 episodes - episode_reward: 11.379 [-118.160, 211.377] - loss: 11.258 - mae: 44.002 - mean_q: 55.776 Interval 401 (200000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0401 2 episodes - episode_reward: -44.699 [-140.392, 50.995] - loss: 10.901 - mae: 43.894 - mean_q: 55.299 Interval 402 (200500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2980 2 episodes - episode_reward: 102.413 [-59.833, 264.660] - loss: 11.865 - mae: 44.236 - mean_q: 56.020 Interval 403 (201000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0459 1 episodes - episode_reward: -34.095 [-34.095, -34.095] - loss: 10.009 - mae: 44.457 - mean_q: 56.100 Interval 404 (201500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3812 1 episodes - episode_reward: 225.255 [225.255, 225.255] - loss: 10.345 - mae: 44.607 - mean_q: 56.159 Interval 405 (202000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3100 2 episodes - episode_reward: 51.368 [-112.799, 215.536] - loss: 10.642 - mae: 44.624 - mean_q: 56.468 Interval 406 (202500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0071 Interval 407 (203000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1023 Interval 408 (203500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3013 1 episodes - episode_reward: 144.531 [144.531, 144.531] - loss: 13.627 - mae: 44.767 - mean_q: 56.196 Interval 409 (204000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0829 2 episodes - episode_reward: 54.301 [-85.953, 194.555] - loss: 9.236 - mae: 45.415 - mean_q: 57.098 Interval 410 (204500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0120 1 episodes - episode_reward: -95.048 [-95.048, -95.048] - loss: 10.528 - mae: 45.528 - mean_q: 57.892 Interval 411 (205000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2546 1 episodes - episode_reward: 200.663 [200.663, 200.663] - loss: 11.324 - mae: 45.898 - mean_q: 57.937 Interval 412 (205500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0345 3 episodes - episode_reward: 29.046 [-98.203, 241.830] - loss: 12.066 - mae: 45.941 - mean_q: 58.382 Interval 413 (206000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0211 1 episodes - episode_reward: -167.627 [-167.627, -167.627] - loss: 8.415 - mae: 46.183 - mean_q: 58.689 Interval 414 (206500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3904 1 episodes - episode_reward: 232.383 [232.383, 232.383] - loss: 11.213 - mae: 46.358 - mean_q: 58.723 Interval 415 (207000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 1.0078 3 episodes - episode_reward: 134.819 [-129.595, 276.403] - loss: 11.738 - mae: 46.268 - mean_q: 58.607 Interval 416 (207500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3880 1 episodes - episode_reward: 323.002 [323.002, 323.002] - loss: 8.261 - mae: 46.823 - mean_q: 59.607 Interval 417 (208000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1971 1 episodes - episode_reward: 141.592 [141.592, 141.592] - loss: 13.095 - mae: 46.461 - mean_q: 58.702 Interval 418 (208500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5331 1 episodes - episode_reward: 244.347 [244.347, 244.347] - loss: 10.757 - mae: 45.967 - mean_q: 59.061 Interval 419 (209000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6188 1 episodes - episode_reward: 236.033 [236.033, 236.033] - loss: 12.086 - mae: 46.318 - mean_q: 58.973 Interval 420 (209500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1937 1 episodes - episode_reward: 265.964 [265.964, 265.964] - loss: 7.739 - mae: 46.086 - mean_q: 58.851 Interval 421 (210000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0177 2 episodes - episode_reward: -87.292 [-131.227, -43.356] - loss: 9.260 - mae: 45.990 - mean_q: 58.698 Interval 422 (210500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3143 4 episodes - episode_reward: 52.393 [-147.332, 235.214] - loss: 9.462 - mae: 46.631 - mean_q: 59.551 Interval 423 (211000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6757 2 episodes - episode_reward: 210.115 [187.297, 232.934] - loss: 10.500 - mae: 46.692 - mean_q: 59.225 Interval 424 (211500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3163 3 episodes - episode_reward: -78.725 [-123.721, -47.867] - loss: 9.037 - mae: 46.893 - mean_q: 58.985 Interval 425 (212000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0684 1 episodes - episode_reward: 229.170 [229.170, 229.170] - loss: 11.709 - mae: 47.040 - mean_q: 59.069 Interval 426 (212500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5633 1 episodes - episode_reward: 155.509 [155.509, 155.509] - loss: 11.777 - mae: 46.935 - mean_q: 58.990 Interval 427 (213000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2965 Interval 428 (213500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0453 Interval 429 (214000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1087 Interval 430 (214500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4530 1 episodes - episode_reward: 161.121 [161.121, 161.121] - loss: 8.980 - mae: 46.362 - mean_q: 58.433 Interval 431 (215000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5395 3 episodes - episode_reward: 137.106 [-77.750, 247.406] - loss: 11.767 - mae: 46.456 - mean_q: 58.710 Interval 432 (215500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2593 Interval 433 (216000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3622 1 episodes - episode_reward: 234.689 [234.689, 234.689] - loss: 10.160 - mae: 46.390 - mean_q: 58.512 Interval 434 (216500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1746 5 episodes - episode_reward: 21.900 [-107.623, 231.308] - loss: 12.760 - mae: 46.300 - mean_q: 58.620 Interval 435 (217000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0015 1 episodes - episode_reward: -34.715 [-34.715, -34.715] - loss: 9.711 - mae: 46.197 - mean_q: 57.636 Interval 436 (217500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3893 1 episodes - episode_reward: 194.304 [194.304, 194.304] - loss: 11.339 - mae: 46.434 - mean_q: 58.519 Interval 437 (218000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2244 1 episodes - episode_reward: 174.479 [174.479, 174.479] - loss: 8.501 - mae: 46.290 - mean_q: 58.516 Interval 438 (218500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7705 1 episodes - episode_reward: 259.770 [259.770, 259.770] - loss: 9.554 - mae: 45.923 - mean_q: 57.745 Interval 439 (219000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2514 2 episodes - episode_reward: 147.550 [37.411, 257.688] - loss: 7.681 - mae: 46.556 - mean_q: 58.631 Interval 440 (219500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1893 Interval 441 (220000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0658 Interval 442 (220500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4265 2 episodes - episode_reward: 75.762 [2.652, 148.871] - loss: 10.475 - mae: 46.397 - mean_q: 58.929 Interval 443 (221000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5233 1 episodes - episode_reward: 301.151 [301.151, 301.151] - loss: 12.115 - mae: 46.500 - mean_q: 58.979 Interval 444 (221500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1565 1 episodes - episode_reward: 172.437 [172.437, 172.437] - loss: 10.222 - mae: 46.015 - mean_q: 58.560 Interval 445 (222000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2651 Interval 446 (222500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3535 3 episodes - episode_reward: -37.636 [-189.470, 207.699] - loss: 9.349 - mae: 45.621 - mean_q: 58.147 Interval 447 (223000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3589 2 episodes - episode_reward: -48.602 [-308.732, 211.529] - loss: 11.877 - mae: 45.744 - mean_q: 57.571 Interval 448 (223500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3058 1 episodes - episode_reward: -53.083 [-53.083, -53.083] - loss: 10.291 - mae: 46.074 - mean_q: 58.580 Interval 449 (224000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7258 2 episodes - episode_reward: 262.407 [258.501, 266.314] - loss: 7.635 - mae: 45.810 - mean_q: 58.022 Interval 450 (224500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2939 2 episodes - episode_reward: 72.881 [-124.776, 270.538] - loss: 9.603 - mae: 46.049 - mean_q: 58.262 Interval 451 (225000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4728 2 episodes - episode_reward: 83.817 [-35.111, 202.744] - loss: 11.155 - mae: 45.998 - mean_q: 58.774 Interval 452 (225500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4714 2 episodes - episode_reward: 67.348 [-167.661, 302.356] - loss: 9.301 - mae: 46.305 - mean_q: 59.018 Interval 453 (226000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7070 2 episodes - episode_reward: 241.701 [199.171, 284.231] - loss: 9.779 - mae: 46.219 - mean_q: 58.721 Interval 454 (226500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5094 1 episodes - episode_reward: -304.467 [-304.467, -304.467] - loss: 12.463 - mae: 46.181 - mean_q: 58.789 Interval 455 (227000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8267 2 episodes - episode_reward: 222.178 [199.240, 245.117] - loss: 7.932 - mae: 46.569 - mean_q: 58.934 Interval 456 (227500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4377 3 episodes - episode_reward: 64.183 [-2.200, 170.120] - loss: 9.027 - mae: 46.443 - mean_q: 59.199 Interval 457 (228000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1182 3 episodes - episode_reward: 52.437 [-91.453, 216.953] - loss: 12.665 - mae: 46.410 - mean_q: 59.358 Interval 458 (228500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8142 1 episodes - episode_reward: 254.391 [254.391, 254.391] - loss: 10.476 - mae: 46.542 - mean_q: 59.677 Interval 459 (229000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5076 1 episodes - episode_reward: 263.314 [263.314, 263.314] - loss: 10.427 - mae: 46.370 - mean_q: 59.583 Interval 460 (229500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7517 2 episodes - episode_reward: 249.953 [212.520, 287.386] - loss: 8.944 - mae: 46.289 - mean_q: 59.153 Interval 461 (230000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2112 Interval 462 (230500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4008 1 episodes - episode_reward: 220.572 [220.572, 220.572] - loss: 11.176 - mae: 46.310 - mean_q: 59.202 Interval 463 (231000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2540 1 episodes - episode_reward: 230.446 [230.446, 230.446] - loss: 8.592 - mae: 46.098 - mean_q: 59.047 Interval 464 (231500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2172 4 episodes - episode_reward: -153.809 [-259.056, -89.411] - loss: 10.932 - mae: 45.622 - mean_q: 58.256 Interval 465 (232000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0713 2 episodes - episode_reward: 31.700 [-112.275, 175.676] - loss: 10.606 - mae: 45.403 - mean_q: 57.687 Interval 466 (232500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1986 4 episodes - episode_reward: -27.534 [-134.267, 239.187] - loss: 9.590 - mae: 45.157 - mean_q: 57.196 Interval 467 (233000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1879 3 episodes - episode_reward: -55.880 [-114.488, 46.849] - loss: 9.705 - mae: 44.994 - mean_q: 57.306 Interval 468 (233500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0415 3 episodes - episode_reward: -33.118 [-162.214, 175.485] - loss: 14.636 - mae: 44.955 - mean_q: 56.821 Interval 469 (234000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2416 1 episodes - episode_reward: 247.218 [247.218, 247.218] - loss: 11.405 - mae: 44.703 - mean_q: 56.680 Interval 470 (234500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1552 3 episodes - episode_reward: -22.900 [-123.609, 151.440] - loss: 13.003 - mae: 44.842 - mean_q: 56.660 Interval 471 (235000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4439 1 episodes - episode_reward: 259.632 [259.632, 259.632] - loss: 9.320 - mae: 44.837 - mean_q: 56.824 Interval 472 (235500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4572 1 episodes - episode_reward: 226.575 [226.575, 226.575] - loss: 9.814 - mae: 44.344 - mean_q: 56.452 Interval 473 (236000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4208 2 episodes - episode_reward: 174.105 [49.246, 298.963] - loss: 10.994 - mae: 44.222 - mean_q: 56.587 Interval 474 (236500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0693 3 episodes - episode_reward: -74.881 [-108.807, -57.411] - loss: 10.157 - mae: 44.362 - mean_q: 56.235 Interval 475 (237000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1464 3 episodes - episode_reward: 51.335 [-106.537, 273.901] - loss: 11.252 - mae: 44.065 - mean_q: 56.158 Interval 476 (237500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6985 2 episodes - episode_reward: 166.405 [20.212, 312.598] - loss: 7.797 - mae: 43.961 - mean_q: 55.951 Interval 477 (238000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2290 1 episodes - episode_reward: 186.361 [186.361, 186.361] - loss: 12.429 - mae: 44.049 - mean_q: 55.772 Interval 478 (238500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2883 2 episodes - episode_reward: 103.318 [32.918, 173.718] - loss: 10.288 - mae: 43.774 - mean_q: 55.999 Interval 479 (239000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7658 1 episodes - episode_reward: -476.321 [-476.321, -476.321] - loss: 10.410 - mae: 44.189 - mean_q: 56.240 Interval 480 (239500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1730 2 episodes - episode_reward: -0.011 [-217.266, 217.245] - loss: 11.569 - mae: 44.543 - mean_q: 56.404 Interval 481 (240000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2469 1 episodes - episode_reward: -231.907 [-231.907, -231.907] - loss: 9.363 - mae: 44.262 - mean_q: 56.014 Interval 482 (240500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6819 2 episodes - episode_reward: 194.417 [193.625, 195.210] - loss: 9.551 - mae: 43.860 - mean_q: 55.317 Interval 483 (241000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1481 Interval 484 (241500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0058 Interval 485 (242000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2131 1 episodes - episode_reward: 212.077 [212.077, 212.077] - loss: 6.680 - mae: 44.305 - mean_q: 55.936 Interval 486 (242500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4834 1 episodes - episode_reward: 171.631 [171.631, 171.631] - loss: 10.677 - mae: 44.170 - mean_q: 55.319 Interval 487 (243000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0273 Interval 488 (243500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0717 Interval 489 (244000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1237 1 episodes - episode_reward: 133.147 [133.147, 133.147] - loss: 11.096 - mae: 43.760 - mean_q: 55.404 Interval 490 (244500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3281 1 episodes - episode_reward: -189.519 [-189.519, -189.519] - loss: 8.821 - mae: 43.608 - mean_q: 55.339 Interval 491 (245000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3025 5 episodes - episode_reward: -229.876 [-384.976, -121.611] - loss: 10.282 - mae: 43.871 - mean_q: 55.098 Interval 492 (245500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1850 Interval 493 (246000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3924 1 episodes - episode_reward: -293.732 [-293.732, -293.732] - loss: 10.717 - mae: 43.511 - mean_q: 54.614 Interval 494 (246500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7905 2 episodes - episode_reward: -214.370 [-370.399, -58.340] - loss: 9.601 - mae: 43.432 - mean_q: 54.342 Interval 495 (247000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1993 Interval 496 (247500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0220 2 episodes - episode_reward: 54.504 [-127.498, 236.507] - loss: 13.173 - mae: 43.658 - mean_q: 54.554 Interval 497 (248000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5850 1 episodes - episode_reward: 278.748 [278.748, 278.748] - loss: 10.202 - mae: 43.833 - mean_q: 54.745 Interval 498 (248500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4748 1 episodes - episode_reward: -325.217 [-325.217, -325.217] - loss: 12.437 - mae: 43.670 - mean_q: 54.511 Interval 499 (249000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2852 3 episodes - episode_reward: -5.357 [-127.116, 205.457] - loss: 14.162 - mae: 43.983 - mean_q: 55.161 Interval 500 (249500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3649 done, took 1508.220 seconds
weights.append(f'dqn_lunar_weights_four.h5f')
models.append(dqn)
models[-1].load_weights(weights[-1])
dqn.save_weights(weights_filename, overwrite=True)
df = pd.DataFrame(history.history)
ax = df['episode_reward'].plot(color = 'lightgray')
df['episode_reward'].rolling(50).mean().plot(color = 'black')
ax.set_xlabel("Episode")
plt.ylabel("Rolling Mean (10) Cumulative Return")
plt.show()
Increasing window size from 4 to 6 has improved performance again. Although the rolling average is still in and around zero, we see big numbers, like +200, for the first time. It is clear though that this performance is unstable, we are still getting some "-400' and '-600'. Given that the model only training for 250,000 steps though, I think this is acceptable.
df.to_csv('lunar_training_weights_4')
I will now let this model train for 2,000,000. We will see if the instability resolves with a longer training time.
rl['Model 5'] = [6, '128/64/32', 2000000, 50000, 500, None]
weights_filename = f'dqn_lunar_weights_five.h5f'
checkpoint_weights_filename = 'dqn_lunar_weights_{step}.h5f'
log_filename = f'dqn_lunar_log.json'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]
start_time = time.time()
history = dqn.fit(env, callbacks=callbacks, nb_steps=2000000, log_interval=500)
end_time = time.time()
Training for 2000000 steps ... Interval 1 (0 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1147 1 episodes - episode_reward: -166.116 [-166.116, -166.116] - loss: 13.000 - mae: 44.508 - mean_q: 55.712 Interval 2 (500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5390 2 episodes - episode_reward: -57.334 [-331.303, 216.634] - loss: 11.020 - mae: 44.079 - mean_q: 55.477 Interval 3 (1000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1391 2 episodes - episode_reward: 9.995 [-155.479, 175.469] - loss: 12.129 - mae: 44.020 - mean_q: 54.997 Interval 4 (1500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3285 1 episodes - episode_reward: -287.423 [-287.423, -287.423] - loss: 12.539 - mae: 44.095 - mean_q: 55.346 Interval 5 (2000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1484 2 episodes - episode_reward: 27.853 [-212.395, 268.102] - loss: 11.636 - mae: 44.158 - mean_q: 55.234 Interval 6 (2500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2004 Interval 7 (3000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0435 Interval 8 (3500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4738 4 episodes - episode_reward: 64.750 [-115.991, 278.372] - loss: 15.584 - mae: 44.706 - mean_q: 55.276 Interval 9 (4000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2769 2 episodes - episode_reward: 78.115 [-89.824, 246.054] - loss: 11.805 - mae: 44.464 - mean_q: 55.103 Interval 10 (4500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3682 1 episodes - episode_reward: 227.545 [227.545, 227.545] - loss: 11.066 - mae: 44.166 - mean_q: 55.136 Interval 11 (5000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.7821 1 episodes - episode_reward: 223.221 [223.221, 223.221] - loss: 11.523 - mae: 44.638 - mean_q: 55.359 Interval 12 (5500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3388 1 episodes - episode_reward: 259.928 [259.928, 259.928] - loss: 11.218 - mae: 44.890 - mean_q: 55.869 Interval 13 (6000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2856 1 episodes - episode_reward: 165.196 [165.196, 165.196] - loss: 8.731 - mae: 44.963 - mean_q: 55.861 Interval 14 (6500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1603 Interval 15 (7000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.1717 1 episodes - episode_reward: 41.226 [41.226, 41.226] - loss: 11.351 - mae: 45.090 - mean_q: 55.869 Interval 16 (7500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4972 1 episodes - episode_reward: 275.803 [275.803, 275.803] - loss: 11.647 - mae: 44.905 - mean_q: 55.529 Interval 17 (8000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1794 Interval 18 (8500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5616 1 episodes - episode_reward: 192.994 [192.994, 192.994] - loss: 10.016 - mae: 45.248 - mean_q: 56.071 Interval 19 (9000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.9493 2 episodes - episode_reward: 255.379 [249.079, 261.679] - loss: 8.897 - mae: 44.853 - mean_q: 55.697 Interval 20 (9500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3296 1 episodes - episode_reward: 295.656 [295.656, 295.656] - loss: 11.862 - mae: 44.811 - mean_q: 55.754 Interval 21 (10000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4281 2 episodes - episode_reward: -182.233 [-232.484, -131.982] - loss: 12.906 - mae: 44.554 - mean_q: 54.941 Interval 22 (10500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4847 1 episodes - episode_reward: 260.541 [260.541, 260.541] - loss: 8.998 - mae: 44.583 - mean_q: 55.802 Interval 23 (11000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3420 1 episodes - episode_reward: 214.946 [214.946, 214.946] - loss: 10.976 - mae: 44.576 - mean_q: 55.571 Interval 24 (11500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0479 2 episodes - episode_reward: 40.730 [-107.222, 188.682] - loss: 9.264 - mae: 44.168 - mean_q: 55.099 Interval 25 (12000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2579 1 episodes - episode_reward: -240.250 [-240.250, -240.250] - loss: 9.897 - mae: 44.182 - mean_q: 54.845 Interval 26 (12500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1252 2 episodes - episode_reward: 72.830 [-60.437, 206.098] - loss: 13.996 - mae: 44.139 - mean_q: 55.139 Interval 27 (13000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3302 Interval 28 (13500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1103 4 episodes - episode_reward: -1.221 [-158.158, 270.296] - loss: 9.900 - mae: 44.514 - mean_q: 55.422 Interval 29 (14000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0108 Interval 30 (14500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0051 Interval 31 (15000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4105 1 episodes - episode_reward: 233.928 [233.928, 233.928] - loss: 12.592 - mae: 45.017 - mean_q: 55.782 Interval 32 (15500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3070 4 episodes - episode_reward: -14.099 [-244.101, 271.549] - loss: 10.544 - mae: 45.012 - mean_q: 56.058 Interval 33 (16000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5980 2 episodes - episode_reward: -163.089 [-208.838, -117.339] - loss: 10.869 - mae: 45.210 - mean_q: 56.501 Interval 34 (16500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2138 2 episodes - episode_reward: 29.852 [-99.211, 158.915] - loss: 9.737 - mae: 45.258 - mean_q: 56.492 Interval 35 (17000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4377 2 episodes - episode_reward: 57.792 [28.698, 86.886] - loss: 9.753 - mae: 45.622 - mean_q: 57.123 Interval 36 (17500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5060 2 episodes - episode_reward: 155.434 [23.138, 287.730] - loss: 10.385 - mae: 45.327 - mean_q: 57.144 Interval 37 (18000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0598 3 episodes - episode_reward: 6.995 [-100.000, 213.985] - loss: 10.693 - mae: 45.616 - mean_q: 57.384 Interval 38 (18500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0409 Interval 39 (19000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2204 2 episodes - episode_reward: 65.200 [-100.000, 230.400] - loss: 10.174 - mae: 44.992 - mean_q: 56.620 Interval 40 (19500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3570 2 episodes - episode_reward: 132.977 [54.519, 211.436] - loss: 12.357 - mae: 44.704 - mean_q: 55.740 Interval 41 (20000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0903 2 episodes - episode_reward: -46.180 [-120.806, 28.445] - loss: 9.525 - mae: 45.213 - mean_q: 57.377 Interval 42 (20500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1045 3 episodes - episode_reward: -26.310 [-136.664, 71.385] - loss: 9.396 - mae: 45.100 - mean_q: 57.510 Interval 43 (21000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1683 2 episodes - episode_reward: 6.116 [-222.255, 234.486] - loss: 11.682 - mae: 45.178 - mean_q: 57.339 Interval 44 (21500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2520 1 episodes - episode_reward: -31.557 [-31.557, -31.557] - loss: 11.276 - mae: 44.912 - mean_q: 56.870 Interval 45 (22000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5263 2 episodes - episode_reward: 234.492 [196.532, 272.453] - loss: 11.379 - mae: 45.560 - mean_q: 57.232 Interval 46 (22500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1901 1 episodes - episode_reward: -138.822 [-138.822, -138.822] - loss: 10.905 - mae: 45.248 - mean_q: 57.215 Interval 47 (23000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0837 3 episodes - episode_reward: -26.740 [-111.832, 131.612] - loss: 10.478 - mae: 45.015 - mean_q: 56.625 Interval 48 (23500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1640 2 episodes - episode_reward: 49.248 [-113.061, 211.557] - loss: 12.997 - mae: 44.551 - mean_q: 55.874 Interval 49 (24000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5465 2 episodes - episode_reward: 110.271 [-87.714, 308.255] - loss: 10.779 - mae: 44.930 - mean_q: 56.491 Interval 50 (24500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0966 2 episodes - episode_reward: -5.274 [-199.358, 188.810] - loss: 17.993 - mae: 44.760 - mean_q: 55.634 Interval 51 (25000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4709 1 episodes - episode_reward: 244.985 [244.985, 244.985] - loss: 11.570 - mae: 44.501 - mean_q: 55.783 Interval 52 (25500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2486 2 episodes - episode_reward: 33.877 [-121.064, 188.818] - loss: 11.623 - mae: 44.263 - mean_q: 54.574 Interval 53 (26000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6360 1 episodes - episode_reward: 249.090 [249.090, 249.090] - loss: 14.870 - mae: 44.793 - mean_q: 55.887 Interval 54 (26500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1402 3 episodes - episode_reward: 20.910 [-104.029, 264.985] - loss: 13.469 - mae: 44.789 - mean_q: 56.084 Interval 55 (27000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9624 4 episodes - episode_reward: -110.961 [-214.200, -44.721] - loss: 13.283 - mae: 44.187 - mean_q: 54.985 Interval 56 (27500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2876 1 episodes - episode_reward: -127.961 [-127.961, -127.961] - loss: 11.388 - mae: 44.107 - mean_q: 54.645 Interval 57 (28000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3604 2 episodes - episode_reward: 99.193 [-85.501, 283.886] - loss: 14.685 - mae: 44.284 - mean_q: 55.024 Interval 58 (28500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4946 1 episodes - episode_reward: 189.314 [189.314, 189.314] - loss: 11.657 - mae: 43.986 - mean_q: 55.019 Interval 59 (29000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0932 2 episodes - episode_reward: 25.434 [-165.842, 216.711] - loss: 10.620 - mae: 43.691 - mean_q: 54.246 Interval 60 (29500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2488 Interval 61 (30000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3138 1 episodes - episode_reward: 234.698 [234.698, 234.698] - loss: 12.112 - mae: 43.419 - mean_q: 53.848 Interval 62 (30500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6247 1 episodes - episode_reward: 211.111 [211.111, 211.111] - loss: 12.897 - mae: 43.340 - mean_q: 54.169 Interval 63 (31000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4054 1 episodes - episode_reward: 257.611 [257.611, 257.611] - loss: 10.363 - mae: 43.640 - mean_q: 54.589 Interval 64 (31500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0886 2 episodes - episode_reward: 44.645 [-126.412, 215.702] - loss: 15.546 - mae: 43.872 - mean_q: 54.749 Interval 65 (32000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3102 3 episodes - episode_reward: 23.103 [-118.343, 297.368] - loss: 11.227 - mae: 43.681 - mean_q: 54.865 Interval 66 (32500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8098 3 episodes - episode_reward: 156.972 [-97.389, 303.585] - loss: 10.580 - mae: 43.732 - mean_q: 55.387 Interval 67 (33000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0709 1 episodes - episode_reward: -62.659 [-62.659, -62.659] - loss: 11.208 - mae: 43.567 - mean_q: 54.726 Interval 68 (33500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0224 Interval 69 (34000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2010 1 episodes - episode_reward: 189.847 [189.847, 189.847] - loss: 11.564 - mae: 44.169 - mean_q: 55.701 Interval 70 (34500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4342 3 episodes - episode_reward: -117.023 [-199.239, -34.757] - loss: 13.846 - mae: 43.919 - mean_q: 55.587 Interval 71 (35000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0077 3 episodes - episode_reward: 20.498 [-106.260, 245.913] - loss: 10.822 - mae: 44.212 - mean_q: 55.527 Interval 72 (35500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1734 Interval 73 (36000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2298 Interval 74 (36500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1311 Interval 75 (37000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1645 Interval 76 (37500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1686 Interval 77 (38000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1764 Interval 78 (38500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1758 Interval 79 (39000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.2784 Interval 80 (39500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2307 Interval 81 (40000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0814 3 episodes - episode_reward: -264.685 [-867.692, 204.965] - loss: 12.512 - mae: 42.370 - mean_q: 53.743 Interval 82 (40500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6432 1 episodes - episode_reward: 216.946 [216.946, 216.946] - loss: 12.516 - mae: 42.598 - mean_q: 53.583 Interval 83 (41000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6415 1 episodes - episode_reward: 291.922 [291.922, 291.922] - loss: 9.656 - mae: 43.019 - mean_q: 54.395 Interval 84 (41500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0422 2 episodes - episode_reward: 33.774 [-138.925, 206.473] - loss: 12.056 - mae: 42.656 - mean_q: 53.424 Interval 85 (42000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1744 Interval 86 (42500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4614 1 episodes - episode_reward: 123.207 [123.207, 123.207] - loss: 10.213 - mae: 42.972 - mean_q: 54.207 Interval 87 (43000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1105 1 episodes - episode_reward: -54.716 [-54.716, -54.716] - loss: 12.755 - mae: 43.238 - mean_q: 54.389 Interval 88 (43500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1513 3 episodes - episode_reward: 15.299 [-115.690, 243.903] - loss: 13.138 - mae: 43.845 - mean_q: 55.359 Interval 89 (44000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0844 1 episodes - episode_reward: -109.335 [-109.335, -109.335] - loss: 9.674 - mae: 43.801 - mean_q: 55.177 Interval 90 (44500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2368 2 episodes - episode_reward: 112.797 [-50.754, 276.349] - loss: 12.698 - mae: 44.028 - mean_q: 55.817 Interval 91 (45000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1734 1 episodes - episode_reward: -24.197 [-24.197, -24.197] - loss: 11.261 - mae: 44.448 - mean_q: 56.313 Interval 92 (45500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2874 3 episodes - episode_reward: 54.449 [-60.299, 258.742] - loss: 13.325 - mae: 44.522 - mean_q: 56.457 Interval 93 (46000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2059 2 episodes - episode_reward: 37.338 [-158.481, 233.156] - loss: 11.613 - mae: 44.681 - mean_q: 57.115 Interval 94 (46500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1222 Interval 95 (47000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3500 1 episodes - episode_reward: 176.297 [176.297, 176.297] - loss: 10.011 - mae: 44.697 - mean_q: 56.887 Interval 96 (47500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0352 2 episodes - episode_reward: 28.359 [-115.674, 172.393] - loss: 14.642 - mae: 44.280 - mean_q: 56.352 Interval 97 (48000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3569 1 episodes - episode_reward: -131.790 [-131.790, -131.790] - loss: 10.064 - mae: 44.181 - mean_q: 56.099 Interval 98 (48500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3059 1 episodes - episode_reward: 124.267 [124.267, 124.267] - loss: 12.843 - mae: 44.435 - mean_q: 56.683 Interval 99 (49000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0778 3 episodes - episode_reward: -18.032 [-125.451, 189.334] - loss: 12.424 - mae: 44.127 - mean_q: 56.251 Interval 100 (49500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3192 1 episodes - episode_reward: 10.765 [10.765, 10.765] - loss: 10.909 - mae: 44.383 - mean_q: 56.464 Interval 101 (50000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2392 2 episodes - episode_reward: -8.667 [-298.885, 281.551] - loss: 11.033 - mae: 44.475 - mean_q: 56.317 Interval 102 (50500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4157 3 episodes - episode_reward: 100.454 [-100.000, 214.682] - loss: 10.619 - mae: 44.716 - mean_q: 56.869 Interval 103 (51000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3723 7 episodes - episode_reward: -104.087 [-170.553, -7.292] - loss: 10.918 - mae: 44.265 - mean_q: 56.252 Interval 104 (51500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1586 Interval 105 (52000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1308 Interval 106 (52500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3020 3 episodes - episode_reward: -202.719 [-347.039, -127.422] - loss: 11.800 - mae: 44.207 - mean_q: 56.316 Interval 107 (53000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4779 2 episodes - episode_reward: -96.851 [-100.594, -93.108] - loss: 12.310 - mae: 44.346 - mean_q: 55.745 Interval 108 (53500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1623 1 episodes - episode_reward: -78.939 [-78.939, -78.939] - loss: 10.350 - mae: 43.937 - mean_q: 55.289 Interval 109 (54000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0717 1 episodes - episode_reward: 48.765 [48.765, 48.765] - loss: 13.082 - mae: 44.106 - mean_q: 55.554 Interval 110 (54500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5044 1 episodes - episode_reward: 118.828 [118.828, 118.828] - loss: 12.328 - mae: 44.848 - mean_q: 55.882 Interval 111 (55000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6297 1 episodes - episode_reward: 292.548 [292.548, 292.548] - loss: 13.257 - mae: 44.881 - mean_q: 55.808 Interval 112 (55500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3661 1 episodes - episode_reward: 280.240 [280.240, 280.240] - loss: 10.507 - mae: 44.406 - mean_q: 55.290 Interval 113 (56000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0093 3 episodes - episode_reward: -132.147 [-338.102, -6.470] - loss: 11.100 - mae: 44.789 - mean_q: 55.971 Interval 114 (56500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1472 Interval 115 (57000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2643 2 episodes - episode_reward: 80.914 [12.179, 149.650] - loss: 17.686 - mae: 44.755 - mean_q: 56.288 Interval 116 (57500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3769 1 episodes - episode_reward: 226.612 [226.612, 226.612] - loss: 11.614 - mae: 44.863 - mean_q: 56.405 Interval 117 (58000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3518 2 episodes - episode_reward: -132.050 [-198.092, -66.007] - loss: 13.919 - mae: 44.994 - mean_q: 56.013 Interval 118 (58500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4836 1 episodes - episode_reward: 249.574 [249.574, 249.574] - loss: 11.925 - mae: 44.610 - mean_q: 56.067 Interval 119 (59000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6690 1 episodes - episode_reward: 219.948 [219.948, 219.948] - loss: 16.562 - mae: 44.398 - mean_q: 55.390 Interval 120 (59500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0740 Interval 121 (60000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1106 2 episodes - episode_reward: 70.547 [-84.430, 225.524] - loss: 11.694 - mae: 44.165 - mean_q: 55.255 Interval 122 (60500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3409 1 episodes - episode_reward: 207.500 [207.500, 207.500] - loss: 13.164 - mae: 44.167 - mean_q: 55.400 Interval 123 (61000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0456 2 episodes - episode_reward: 0.963 [-100.000, 101.926] - loss: 13.571 - mae: 44.338 - mean_q: 55.335 Interval 124 (61500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1387 1 episodes - episode_reward: -159.277 [-159.277, -159.277] - loss: 11.095 - mae: 44.380 - mean_q: 55.290 Interval 125 (62000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1914 1 episodes - episode_reward: 314.088 [314.088, 314.088] - loss: 12.697 - mae: 44.227 - mean_q: 54.975 Interval 126 (62500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5585 1 episodes - episode_reward: 198.374 [198.374, 198.374] - loss: 9.732 - mae: 43.903 - mean_q: 54.665 Interval 127 (63000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3694 1 episodes - episode_reward: 273.986 [273.986, 273.986] - loss: 10.732 - mae: 43.765 - mean_q: 53.883 Interval 128 (63500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2857 2 episodes - episode_reward: -52.865 [-155.465, 49.735] - loss: 11.480 - mae: 43.933 - mean_q: 54.145 Interval 129 (64000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5181 1 episodes - episode_reward: 231.423 [231.423, 231.423] - loss: 13.067 - mae: 43.994 - mean_q: 54.388 Interval 130 (64500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3090 Interval 131 (65000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2202 1 episodes - episode_reward: 215.489 [215.489, 215.489] - loss: 12.274 - mae: 43.920 - mean_q: 54.732 Interval 132 (65500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0273 Interval 133 (66000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0263 Interval 134 (66500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1602 1 episodes - episode_reward: 118.277 [118.277, 118.277] - loss: 17.400 - mae: 44.055 - mean_q: 55.052 Interval 135 (67000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0983 Interval 136 (67500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0137 Interval 137 (68000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0802 Interval 138 (68500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4613 1 episodes - episode_reward: 114.358 [114.358, 114.358] - loss: 12.527 - mae: 43.353 - mean_q: 54.844 Interval 139 (69000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1965 1 episodes - episode_reward: 233.930 [233.930, 233.930] - loss: 9.865 - mae: 43.251 - mean_q: 54.505 Interval 140 (69500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1030 Interval 141 (70000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4340 1 episodes - episode_reward: 156.775 [156.775, 156.775] - loss: 14.110 - mae: 43.580 - mean_q: 55.466 Interval 142 (70500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5187 1 episodes - episode_reward: 192.605 [192.605, 192.605] - loss: 10.976 - mae: 43.463 - mean_q: 55.930 Interval 143 (71000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2917 1 episodes - episode_reward: 261.355 [261.355, 261.355] - loss: 15.993 - mae: 43.659 - mean_q: 55.534 Interval 144 (71500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5489 1 episodes - episode_reward: 171.819 [171.819, 171.819] - loss: 10.836 - mae: 43.722 - mean_q: 55.438 Interval 145 (72000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5662 2 episodes - episode_reward: 227.534 [206.595, 248.474] - loss: 10.047 - mae: 43.820 - mean_q: 56.019 Interval 146 (72500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5815 1 episodes - episode_reward: 194.455 [194.455, 194.455] - loss: 9.506 - mae: 43.692 - mean_q: 55.722 Interval 147 (73000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3709 1 episodes - episode_reward: 276.112 [276.112, 276.112] - loss: 9.308 - mae: 44.096 - mean_q: 55.853 Interval 148 (73500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6033 1 episodes - episode_reward: 175.183 [175.183, 175.183] - loss: 8.063 - mae: 44.075 - mean_q: 56.428 Interval 149 (74000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1503 Interval 150 (74500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3220 2 episodes - episode_reward: 3.409 [-109.435, 116.253] - loss: 9.845 - mae: 43.829 - mean_q: 56.230 Interval 151 (75000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5799 2 episodes - episode_reward: 230.629 [173.626, 287.631] - loss: 10.008 - mae: 44.413 - mean_q: 57.050 Interval 152 (75500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1264 3 episodes - episode_reward: -21.536 [-130.559, 161.856] - loss: 11.040 - mae: 44.525 - mean_q: 57.257 Interval 153 (76000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4811 1 episodes - episode_reward: 228.758 [228.758, 228.758] - loss: 9.940 - mae: 44.740 - mean_q: 57.488 Interval 154 (76500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1041 Interval 155 (77000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7082 2 episodes - episode_reward: -132.300 [-140.062, -124.538] - loss: 15.111 - mae: 44.278 - mean_q: 57.327 Interval 156 (77500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0456 Interval 157 (78000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3710 1 episodes - episode_reward: 103.281 [103.281, 103.281] - loss: 9.539 - mae: 45.276 - mean_q: 58.548 Interval 158 (78500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1803 Interval 159 (79000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0333 Interval 160 (79500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2697 2 episodes - episode_reward: 1.667 [-117.894, 121.228] - loss: 9.363 - mae: 45.343 - mean_q: 58.833 Interval 161 (80000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5066 1 episodes - episode_reward: 218.462 [218.462, 218.462] - loss: 7.150 - mae: 45.745 - mean_q: 59.520 Interval 162 (80500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1617 1 episodes - episode_reward: 246.835 [246.835, 246.835] - loss: 10.875 - mae: 45.765 - mean_q: 59.692 Interval 163 (81000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5009 3 episodes - episode_reward: -121.454 [-282.877, 27.846] - loss: 11.392 - mae: 45.768 - mean_q: 59.517 Interval 164 (81500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7276 3 episodes - episode_reward: 123.065 [-113.501, 275.013] - loss: 14.406 - mae: 46.002 - mean_q: 59.573 Interval 165 (82000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.9977 2 episodes - episode_reward: 293.649 [276.528, 310.770] - loss: 10.690 - mae: 45.809 - mean_q: 59.700 Interval 166 (82500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3804 1 episodes - episode_reward: 173.982 [173.982, 173.982] - loss: 11.675 - mae: 45.794 - mean_q: 59.378 Interval 167 (83000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4465 1 episodes - episode_reward: 198.075 [198.075, 198.075] - loss: 9.351 - mae: 45.492 - mean_q: 59.086 Interval 168 (83500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3226 Interval 169 (84000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2085 1 episodes - episode_reward: 300.594 [300.594, 300.594] - loss: 12.662 - mae: 46.476 - mean_q: 60.082 Interval 170 (84500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3631 1 episodes - episode_reward: 66.751 [66.751, 66.751] - loss: 9.213 - mae: 46.593 - mean_q: 60.271 Interval 171 (85000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5124 3 episodes - episode_reward: -214.173 [-511.844, 216.978] - loss: 9.974 - mae: 46.432 - mean_q: 60.346 Interval 172 (85500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1973 Interval 173 (86000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4031 1 episodes - episode_reward: 215.074 [215.074, 215.074] - loss: 12.722 - mae: 46.297 - mean_q: 60.265 Interval 174 (86500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0381 2 episodes - episode_reward: 10.377 [-198.977, 219.731] - loss: 10.935 - mae: 46.124 - mean_q: 59.679 Interval 175 (87000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3565 1 episodes - episode_reward: -149.734 [-149.734, -149.734] - loss: 10.120 - mae: 46.145 - mean_q: 59.460 Interval 176 (87500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0585 1 episodes - episode_reward: -68.477 [-68.477, -68.477] - loss: 12.141 - mae: 45.651 - mean_q: 59.594 Interval 177 (88000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0764 2 episodes - episode_reward: 55.257 [-130.447, 240.960] - loss: 12.243 - mae: 45.879 - mean_q: 59.567 Interval 178 (88500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2084 3 episodes - episode_reward: -18.322 [-162.783, 218.642] - loss: 12.731 - mae: 45.680 - mean_q: 59.500 Interval 179 (89000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8491 1 episodes - episode_reward: 260.797 [260.797, 260.797] - loss: 10.179 - mae: 46.269 - mean_q: 59.695 Interval 180 (89500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4308 1 episodes - episode_reward: 258.943 [258.943, 258.943] - loss: 11.160 - mae: 46.276 - mean_q: 59.888 Interval 181 (90000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3988 2 episodes - episode_reward: -92.774 [-428.990, 243.443] - loss: 11.173 - mae: 46.255 - mean_q: 59.634 Interval 182 (90500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3508 2 episodes - episode_reward: 48.135 [-114.231, 210.501] - loss: 9.050 - mae: 46.200 - mean_q: 59.697 Interval 183 (91000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0636 1 episodes - episode_reward: 277.293 [277.293, 277.293] - loss: 15.289 - mae: 45.583 - mean_q: 58.757 Interval 184 (91500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2025 2 episodes - episode_reward: -1.167 [-242.916, 240.582] - loss: 11.609 - mae: 45.893 - mean_q: 59.283 Interval 185 (92000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0731 2 episodes - episode_reward: 33.608 [-207.977, 275.193] - loss: 8.155 - mae: 45.909 - mean_q: 59.324 Interval 186 (92500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8179 1 episodes - episode_reward: 273.026 [273.026, 273.026] - loss: 9.673 - mae: 46.089 - mean_q: 59.430 Interval 187 (93000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5063 1 episodes - episode_reward: 246.188 [246.188, 246.188] - loss: 10.443 - mae: 46.403 - mean_q: 60.116 Interval 188 (93500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5468 1 episodes - episode_reward: 257.062 [257.062, 257.062] - loss: 8.794 - mae: 46.267 - mean_q: 59.716 Interval 189 (94000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6303 1 episodes - episode_reward: 270.560 [270.560, 270.560] - loss: 10.654 - mae: 46.240 - mean_q: 59.957 Interval 190 (94500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5567 1 episodes - episode_reward: 311.132 [311.132, 311.132] - loss: 11.154 - mae: 46.469 - mean_q: 59.590 Interval 191 (95000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7869 2 episodes - episode_reward: 269.942 [260.452, 279.432] - loss: 9.503 - mae: 46.306 - mean_q: 59.376 Interval 192 (95500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5209 1 episodes - episode_reward: 282.631 [282.631, 282.631] - loss: 9.287 - mae: 46.825 - mean_q: 60.020 Interval 193 (96000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0571 2 episodes - episode_reward: -66.344 [-381.491, 248.802] - loss: 14.368 - mae: 46.881 - mean_q: 59.860 Interval 194 (96500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3115 2 episodes - episode_reward: 140.611 [49.297, 231.925] - loss: 10.351 - mae: 47.471 - mean_q: 60.891 Interval 195 (97000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5285 1 episodes - episode_reward: 223.386 [223.386, 223.386] - loss: 13.414 - mae: 47.630 - mean_q: 60.919 Interval 196 (97500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1706 Interval 197 (98000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1081 Interval 198 (98500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4862 1 episodes - episode_reward: 68.997 [68.997, 68.997] - loss: 12.060 - mae: 47.956 - mean_q: 61.366 Interval 199 (99000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1236 Interval 200 (99500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1866 2 episodes - episode_reward: 40.894 [-148.596, 230.383] - loss: 11.757 - mae: 47.967 - mean_q: 61.694 Interval 201 (100000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7981 2 episodes - episode_reward: 250.616 [239.803, 261.428] - loss: 10.496 - mae: 47.927 - mean_q: 61.908 Interval 202 (100500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 1.1779 2 episodes - episode_reward: 297.806 [268.372, 327.240] - loss: 14.726 - mae: 48.674 - mean_q: 62.525 Interval 203 (101000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5356 1 episodes - episode_reward: -251.386 [-251.386, -251.386] - loss: 10.466 - mae: 48.291 - mean_q: 62.421 Interval 204 (101500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5363 1 episodes - episode_reward: 206.778 [206.778, 206.778] - loss: 8.449 - mae: 48.939 - mean_q: 63.627 Interval 205 (102000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6465 1 episodes - episode_reward: 228.315 [228.315, 228.315] - loss: 8.826 - mae: 48.896 - mean_q: 63.170 Interval 206 (102500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0975 2 episodes - episode_reward: 35.240 [-201.574, 272.053] - loss: 8.767 - mae: 48.795 - mean_q: 63.318 Interval 207 (103000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1271 Interval 208 (103500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4553 2 episodes - episode_reward: 95.280 [-27.329, 217.889] - loss: 11.263 - mae: 48.996 - mean_q: 63.959 Interval 209 (104000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6031 2 episodes - episode_reward: 224.931 [171.528, 278.335] - loss: 10.339 - mae: 49.701 - mean_q: 64.101 Interval 210 (104500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4499 1 episodes - episode_reward: 174.336 [174.336, 174.336] - loss: 10.183 - mae: 49.591 - mean_q: 63.896 Interval 211 (105000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0866 1 episodes - episode_reward: 14.173 [14.173, 14.173] - loss: 8.076 - mae: 49.718 - mean_q: 64.090 Interval 212 (105500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6430 2 episodes - episode_reward: 198.291 [181.793, 214.789] - loss: 17.104 - mae: 49.832 - mean_q: 63.806 Interval 213 (106000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3532 1 episodes - episode_reward: 192.797 [192.797, 192.797] - loss: 10.348 - mae: 49.749 - mean_q: 64.457 Interval 214 (106500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4819 2 episodes - episode_reward: -117.453 [-140.696, -94.209] - loss: 11.993 - mae: 49.619 - mean_q: 63.950 Interval 215 (107000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2826 2 episodes - episode_reward: 64.954 [-62.484, 192.392] - loss: 6.080 - mae: 48.762 - mean_q: 63.469 Interval 216 (107500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5023 1 episodes - episode_reward: 14.043 [14.043, 14.043] - loss: 6.828 - mae: 48.901 - mean_q: 63.432 Interval 217 (108000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3838 1 episodes - episode_reward: 336.898 [336.898, 336.898] - loss: 8.289 - mae: 48.646 - mean_q: 63.006 Interval 218 (108500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3943 1 episodes - episode_reward: 224.982 [224.982, 224.982] - loss: 9.563 - mae: 48.368 - mean_q: 63.087 Interval 219 (109000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0156 3 episodes - episode_reward: -3.380 [-125.795, 184.156] - loss: 8.125 - mae: 47.858 - mean_q: 62.568 Interval 220 (109500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3778 2 episodes - episode_reward: 122.416 [20.640, 224.192] - loss: 10.639 - mae: 47.587 - mean_q: 62.096 Interval 221 (110000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5248 1 episodes - episode_reward: 259.953 [259.953, 259.953] - loss: 9.183 - mae: 47.460 - mean_q: 61.622 Interval 222 (110500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4580 1 episodes - episode_reward: 229.169 [229.169, 229.169] - loss: 9.959 - mae: 46.846 - mean_q: 61.162 Interval 223 (111000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7341 3 episodes - episode_reward: -132.133 [-364.661, 190.629] - loss: 9.768 - mae: 46.909 - mean_q: 60.954 Interval 224 (111500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.9570 2 episodes - episode_reward: 251.681 [227.117, 276.244] - loss: 6.917 - mae: 46.543 - mean_q: 60.647 Interval 225 (112000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3618 2 episodes - episode_reward: 58.804 [-166.551, 284.159] - loss: 9.346 - mae: 46.831 - mean_q: 60.906 Interval 226 (112500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2722 3 episodes - episode_reward: -42.338 [-76.892, 18.430] - loss: 12.417 - mae: 46.748 - mean_q: 60.664 Interval 227 (113000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3449 1 episodes - episode_reward: 193.869 [193.869, 193.869] - loss: 12.236 - mae: 46.944 - mean_q: 60.705 Interval 228 (113500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6347 2 episodes - episode_reward: -119.425 [-216.322, -22.528] - loss: 8.304 - mae: 46.547 - mean_q: 60.262 Interval 229 (114000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3542 1 episodes - episode_reward: 159.196 [159.196, 159.196] - loss: 9.658 - mae: 47.023 - mean_q: 60.358 Interval 230 (114500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2725 1 episodes - episode_reward: -49.593 [-49.593, -49.593] - loss: 9.485 - mae: 47.134 - mean_q: 60.705 Interval 231 (115000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4708 1 episodes - episode_reward: 274.731 [274.731, 274.731] - loss: 8.933 - mae: 47.628 - mean_q: 61.378 Interval 232 (115500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7352 2 episodes - episode_reward: 236.270 [236.057, 236.483] - loss: 10.745 - mae: 47.529 - mean_q: 61.044 Interval 233 (116000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3758 2 episodes - episode_reward: 113.913 [31.301, 196.526] - loss: 11.129 - mae: 47.478 - mean_q: 60.995 Interval 234 (116500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1617 Interval 235 (117000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0288 1 episodes - episode_reward: 170.346 [170.346, 170.346] - loss: 9.795 - mae: 47.803 - mean_q: 61.636 Interval 236 (117500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0790 1 episodes - episode_reward: -126.815 [-126.815, -126.815] - loss: 9.668 - mae: 47.998 - mean_q: 61.696 Interval 237 (118000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2894 1 episodes - episode_reward: 146.970 [146.970, 146.970] - loss: 9.842 - mae: 47.931 - mean_q: 62.067 Interval 238 (118500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3889 2 episodes - episode_reward: 67.964 [-108.830, 244.757] - loss: 9.819 - mae: 47.919 - mean_q: 61.923 Interval 239 (119000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0230 Interval 240 (119500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2356 1 episodes - episode_reward: 233.006 [233.006, 233.006] - loss: 10.463 - mae: 47.542 - mean_q: 61.426 Interval 241 (120000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6061 3 episodes - episode_reward: 110.307 [-100.000, 272.224] - loss: 10.075 - mae: 47.369 - mean_q: 61.277 Interval 242 (120500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5725 1 episodes - episode_reward: 233.461 [233.461, 233.461] - loss: 12.983 - mae: 48.058 - mean_q: 62.068 Interval 243 (121000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1845 2 episodes - episode_reward: 54.441 [-144.890, 253.772] - loss: 8.637 - mae: 47.831 - mean_q: 61.956 Interval 244 (121500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0425 3 episodes - episode_reward: -48.523 [-234.914, 199.854] - loss: 10.372 - mae: 47.809 - mean_q: 61.904 Interval 245 (122000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3432 1 episodes - episode_reward: 291.332 [291.332, 291.332] - loss: 11.844 - mae: 47.971 - mean_q: 61.479 Interval 246 (122500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4531 1 episodes - episode_reward: 165.798 [165.798, 165.798] - loss: 12.151 - mae: 47.757 - mean_q: 61.609 Interval 247 (123000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4003 2 episodes - episode_reward: 187.694 [159.977, 215.411] - loss: 8.806 - mae: 47.453 - mean_q: 61.627 Interval 248 (123500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1509 3 episodes - episode_reward: -41.414 [-225.050, 241.536] - loss: 15.384 - mae: 47.922 - mean_q: 62.154 Interval 249 (124000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0885 3 episodes - episode_reward: -7.421 [-226.834, 224.857] - loss: 9.395 - mae: 48.182 - mean_q: 62.186 Interval 250 (124500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5477 1 episodes - episode_reward: 213.882 [213.882, 213.882] - loss: 9.341 - mae: 48.263 - mean_q: 62.888 Interval 251 (125000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1008 Interval 252 (125500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1975 1 episodes - episode_reward: 186.148 [186.148, 186.148] - loss: 11.803 - mae: 48.030 - mean_q: 62.360 Interval 253 (126000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5091 2 episodes - episode_reward: -198.820 [-298.437, -99.203] - loss: 8.305 - mae: 48.003 - mean_q: 62.063 Interval 254 (126500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4316 1 episodes - episode_reward: 232.684 [232.684, 232.684] - loss: 10.687 - mae: 48.283 - mean_q: 62.187 Interval 255 (127000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5364 1 episodes - episode_reward: 319.334 [319.334, 319.334] - loss: 10.641 - mae: 48.362 - mean_q: 63.075 Interval 256 (127500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2088 3 episodes - episode_reward: -19.337 [-128.947, 191.454] - loss: 6.909 - mae: 48.217 - mean_q: 62.537 Interval 257 (128000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1887 3 episodes - episode_reward: -13.859 [-154.701, 228.743] - loss: 8.902 - mae: 48.275 - mean_q: 62.479 Interval 258 (128500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1601 Interval 259 (129000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0350 Interval 260 (129500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0414 Interval 261 (130000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0474 Interval 262 (130500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0347 Interval 263 (131000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0308 Interval 264 (131500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0317 Interval 265 (132000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1769 2 episodes - episode_reward: -25.374 [-92.484, 41.735] - loss: 9.762 - mae: 46.057 - mean_q: 58.266 Interval 266 (132500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2357 1 episodes - episode_reward: 236.706 [236.706, 236.706] - loss: 9.346 - mae: 45.912 - mean_q: 58.419 Interval 267 (133000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5476 1 episodes - episode_reward: 218.077 [218.077, 218.077] - loss: 9.806 - mae: 45.949 - mean_q: 58.354 Interval 268 (133500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2951 1 episodes - episode_reward: 201.815 [201.815, 201.815] - loss: 11.151 - mae: 45.918 - mean_q: 58.421 Interval 269 (134000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1600 Interval 270 (134500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1133 2 episodes - episode_reward: 15.222 [-120.050, 150.495] - loss: 21.701 - mae: 45.746 - mean_q: 57.532 Interval 271 (135000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3213 1 episodes - episode_reward: 226.444 [226.444, 226.444] - loss: 7.014 - mae: 45.424 - mean_q: 57.403 Interval 272 (135500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3642 1 episodes - episode_reward: 133.890 [133.890, 133.890] - loss: 8.086 - mae: 45.072 - mean_q: 57.013 Interval 273 (136000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2313 3 episodes - episode_reward: -21.105 [-144.561, 175.633] - loss: 7.245 - mae: 44.991 - mean_q: 56.250 Interval 274 (136500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4608 1 episodes - episode_reward: -335.170 [-335.170, -335.170] - loss: 7.004 - mae: 44.958 - mean_q: 55.900 Interval 275 (137000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8671 3 episodes - episode_reward: 182.289 [99.542, 236.279] - loss: 13.454 - mae: 44.958 - mean_q: 56.207 Interval 276 (137500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2655 2 episodes - episode_reward: 41.918 [-100.000, 183.837] - loss: 16.083 - mae: 44.951 - mean_q: 56.668 Interval 277 (138000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3343 3 episodes - episode_reward: 69.346 [-43.837, 282.038] - loss: 8.444 - mae: 45.060 - mean_q: 57.229 Interval 278 (138500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6596 3 episodes - episode_reward: -103.071 [-178.659, -62.464] - loss: 8.032 - mae: 45.001 - mean_q: 57.188 Interval 279 (139000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5356 1 episodes - episode_reward: 170.361 [170.361, 170.361] - loss: 8.506 - mae: 44.931 - mean_q: 56.770 Interval 280 (139500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3958 2 episodes - episode_reward: 146.486 [21.919, 271.053] - loss: 10.470 - mae: 44.756 - mean_q: 56.410 Interval 281 (140000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0272 2 episodes - episode_reward: -14.170 [-186.960, 158.619] - loss: 8.662 - mae: 44.605 - mean_q: 56.577 Interval 282 (140500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0679 1 episodes - episode_reward: -140.294 [-140.294, -140.294] - loss: 9.607 - mae: 44.385 - mean_q: 56.274 Interval 283 (141000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4644 1 episodes - episode_reward: 277.213 [277.213, 277.213] - loss: 8.727 - mae: 44.638 - mean_q: 56.657 Interval 284 (141500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3344 2 episodes - episode_reward: 135.085 [38.538, 231.632] - loss: 9.556 - mae: 44.672 - mean_q: 56.918 Interval 285 (142000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7679 1 episodes - episode_reward: 258.974 [258.974, 258.974] - loss: 10.334 - mae: 44.686 - mean_q: 57.144 Interval 286 (142500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0383 2 episodes - episode_reward: 67.560 [-118.266, 253.386] - loss: 10.061 - mae: 44.508 - mean_q: 56.633 Interval 287 (143000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0208 3 episodes - episode_reward: -2.439 [-124.807, 197.901] - loss: 10.361 - mae: 43.899 - mean_q: 56.246 Interval 288 (143500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3355 2 episodes - episode_reward: 53.343 [-136.517, 243.202] - loss: 9.072 - mae: 43.969 - mean_q: 56.454 Interval 289 (144000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4416 1 episodes - episode_reward: 280.759 [280.759, 280.759] - loss: 12.037 - mae: 44.157 - mean_q: 56.112 Interval 290 (144500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1566 Interval 291 (145000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0288 Interval 292 (145500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1730 1 episodes - episode_reward: 206.989 [206.989, 206.989] - loss: 10.598 - mae: 43.348 - mean_q: 55.355 Interval 293 (146000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1953 Interval 294 (146500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0750 1 episodes - episode_reward: 208.299 [208.299, 208.299] - loss: 8.671 - mae: 43.130 - mean_q: 55.736 Interval 295 (147000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4646 2 episodes - episode_reward: -225.495 [-226.810, -224.180] - loss: 13.770 - mae: 43.536 - mean_q: 56.001 Interval 296 (147500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2845 1 episodes - episode_reward: 247.847 [247.847, 247.847] - loss: 14.165 - mae: 43.304 - mean_q: 55.267 Interval 297 (148000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4628 1 episodes - episode_reward: 206.264 [206.264, 206.264] - loss: 9.156 - mae: 43.706 - mean_q: 56.029 Interval 298 (148500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3618 2 episodes - episode_reward: 34.761 [-88.038, 157.560] - loss: 8.124 - mae: 43.690 - mean_q: 55.827 Interval 299 (149000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3349 2 episodes - episode_reward: 153.179 [31.333, 275.025] - loss: 11.347 - mae: 43.806 - mean_q: 56.274 Interval 300 (149500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3387 1 episodes - episode_reward: 173.231 [173.231, 173.231] - loss: 9.060 - mae: 43.502 - mean_q: 55.684 Interval 301 (150000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5824 1 episodes - episode_reward: 260.959 [260.959, 260.959] - loss: 11.111 - mae: 43.982 - mean_q: 56.275 Interval 302 (150500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4786 1 episodes - episode_reward: 169.328 [169.328, 169.328] - loss: 9.649 - mae: 43.399 - mean_q: 55.824 Interval 303 (151000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3117 2 episodes - episode_reward: 65.087 [-100.000, 230.174] - loss: 10.818 - mae: 43.494 - mean_q: 55.668 Interval 304 (151500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0118 Interval 305 (152000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8899 6 episodes - episode_reward: -41.775 [-125.741, 250.677] - loss: 9.582 - mae: 43.224 - mean_q: 55.462 Interval 306 (152500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2266 2 episodes - episode_reward: 55.595 [-177.362, 288.553] - loss: 19.707 - mae: 43.251 - mean_q: 55.082 Interval 307 (153000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6463 1 episodes - episode_reward: 236.276 [236.276, 236.276] - loss: 9.113 - mae: 43.454 - mean_q: 55.530 Interval 308 (153500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2061 Interval 309 (154000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2086 1 episodes - episode_reward: 223.886 [223.886, 223.886] - loss: 8.139 - mae: 43.566 - mean_q: 55.151 Interval 310 (154500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0385 2 episodes - episode_reward: 9.591 [-238.203, 257.384] - loss: 14.265 - mae: 43.600 - mean_q: 55.753 Interval 311 (155000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7984 2 episodes - episode_reward: 207.472 [151.954, 262.989] - loss: 8.402 - mae: 43.321 - mean_q: 55.643 Interval 312 (155500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4669 1 episodes - episode_reward: 194.176 [194.176, 194.176] - loss: 10.150 - mae: 43.988 - mean_q: 56.469 Interval 313 (156000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3753 1 episodes - episode_reward: 222.153 [222.153, 222.153] - loss: 10.901 - mae: 44.061 - mean_q: 56.199 Interval 314 (156500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6745 5 episodes - episode_reward: -61.927 [-102.469, 2.402] - loss: 8.582 - mae: 44.012 - mean_q: 56.118 Interval 315 (157000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1285 4 episodes - episode_reward: -5.969 [-144.779, 317.356] - loss: 10.097 - mae: 44.149 - mean_q: 55.856 Interval 316 (157500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0537 2 episodes - episode_reward: 25.263 [-152.078, 202.605] - loss: 11.832 - mae: 43.898 - mean_q: 56.075 Interval 317 (158000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0632 1 episodes - episode_reward: 69.315 [69.315, 69.315] - loss: 9.553 - mae: 44.147 - mean_q: 56.234 Interval 318 (158500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0441 1 episodes - episode_reward: -83.855 [-83.855, -83.855] - loss: 13.703 - mae: 44.071 - mean_q: 55.732 Interval 319 (159000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4259 1 episodes - episode_reward: 251.663 [251.663, 251.663] - loss: 9.897 - mae: 43.689 - mean_q: 54.734 Interval 320 (159500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3622 1 episodes - episode_reward: 172.545 [172.545, 172.545] - loss: 10.273 - mae: 43.358 - mean_q: 54.884 Interval 321 (160000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4286 1 episodes - episode_reward: 158.383 [158.383, 158.383] - loss: 10.496 - mae: 43.538 - mean_q: 54.583 Interval 322 (160500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7209 2 episodes - episode_reward: 232.845 [213.293, 252.398] - loss: 10.430 - mae: 43.598 - mean_q: 54.771 Interval 323 (161000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4146 1 episodes - episode_reward: 208.118 [208.118, 208.118] - loss: 10.281 - mae: 43.923 - mean_q: 55.272 Interval 324 (161500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1480 2 episodes - episode_reward: -78.156 [-129.755, -26.558] - loss: 13.633 - mae: 43.426 - mean_q: 54.637 Interval 325 (162000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1497 3 episodes - episode_reward: -0.130 [-139.176, 263.656] - loss: 10.838 - mae: 43.568 - mean_q: 54.965 Interval 326 (162500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7978 1 episodes - episode_reward: 246.523 [246.523, 246.523] - loss: 14.381 - mae: 43.750 - mean_q: 55.692 Interval 327 (163000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4247 1 episodes - episode_reward: 261.467 [261.467, 261.467] - loss: 11.608 - mae: 43.589 - mean_q: 55.889 Interval 328 (163500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0267 Interval 329 (164000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4792 1 episodes - episode_reward: 195.948 [195.948, 195.948] - loss: 8.812 - mae: 43.947 - mean_q: 56.543 Interval 330 (164500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0397 2 episodes - episode_reward: 72.870 [-112.622, 258.362] - loss: 8.613 - mae: 43.831 - mean_q: 56.125 Interval 331 (165000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0223 Interval 332 (165500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6490 1 episodes - episode_reward: 164.386 [164.386, 164.386] - loss: 10.538 - mae: 43.579 - mean_q: 55.570 Interval 333 (166000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4146 3 episodes - episode_reward: 111.937 [-106.952, 265.753] - loss: 10.187 - mae: 43.531 - mean_q: 55.431 Interval 334 (166500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2531 2 episodes - episode_reward: 70.821 [-100.000, 241.641] - loss: 9.477 - mae: 43.745 - mean_q: 55.859 Interval 335 (167000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5252 1 episodes - episode_reward: 178.488 [178.488, 178.488] - loss: 9.782 - mae: 43.841 - mean_q: 55.506 Interval 336 (167500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1508 3 episodes - episode_reward: -1.722 [-169.608, 284.391] - loss: 10.831 - mae: 43.739 - mean_q: 55.856 Interval 337 (168000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1644 2 episodes - episode_reward: 65.202 [-101.313, 231.717] - loss: 9.235 - mae: 43.826 - mean_q: 56.246 Interval 338 (168500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5760 1 episodes - episode_reward: 194.762 [194.762, 194.762] - loss: 12.764 - mae: 43.855 - mean_q: 55.955 Interval 339 (169000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5778 1 episodes - episode_reward: 261.717 [261.717, 261.717] - loss: 9.653 - mae: 43.889 - mean_q: 56.153 Interval 340 (169500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5499 1 episodes - episode_reward: 230.859 [230.859, 230.859] - loss: 10.993 - mae: 44.292 - mean_q: 56.516 Interval 341 (170000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3326 1 episodes - episode_reward: 270.289 [270.289, 270.289] - loss: 10.771 - mae: 44.351 - mean_q: 56.988 Interval 342 (170500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2413 Interval 343 (171000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2669 3 episodes - episode_reward: 93.610 [-25.270, 273.878] - loss: 8.531 - mae: 44.863 - mean_q: 57.682 Interval 344 (171500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1836 Interval 345 (172000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0109 Interval 346 (172500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0174 Interval 347 (173000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3189 1 episodes - episode_reward: 203.696 [203.696, 203.696] - loss: 9.942 - mae: 44.507 - mean_q: 57.350 Interval 348 (173500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5418 2 episodes - episode_reward: 109.925 [33.611, 186.240] - loss: 8.573 - mae: 44.287 - mean_q: 57.509 Interval 349 (174000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4074 1 episodes - episode_reward: 224.994 [224.994, 224.994] - loss: 10.937 - mae: 44.874 - mean_q: 58.207 Interval 350 (174500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3892 1 episodes - episode_reward: 175.316 [175.316, 175.316] - loss: 11.034 - mae: 44.911 - mean_q: 57.842 Interval 351 (175000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7322 2 episodes - episode_reward: 253.906 [240.739, 267.073] - loss: 8.270 - mae: 44.921 - mean_q: 57.922 Interval 352 (175500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8171 1 episodes - episode_reward: 184.981 [184.981, 184.981] - loss: 10.308 - mae: 45.189 - mean_q: 58.237 Interval 353 (176000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.9321 2 episodes - episode_reward: 296.829 [296.668, 296.991] - loss: 7.436 - mae: 45.303 - mean_q: 59.046 Interval 354 (176500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5004 2 episodes - episode_reward: 159.266 [29.366, 289.166] - loss: 9.704 - mae: 45.929 - mean_q: 59.718 Interval 355 (177000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7539 1 episodes - episode_reward: 224.077 [224.077, 224.077] - loss: 10.065 - mae: 45.810 - mean_q: 59.410 Interval 356 (177500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4818 1 episodes - episode_reward: 256.912 [256.912, 256.912] - loss: 9.476 - mae: 46.465 - mean_q: 60.664 Interval 357 (178000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2817 2 episodes - episode_reward: 115.984 [-5.771, 237.739] - loss: 10.333 - mae: 46.347 - mean_q: 60.621 Interval 358 (178500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.9031 1 episodes - episode_reward: 333.472 [333.472, 333.472] - loss: 9.286 - mae: 46.505 - mean_q: 60.723 Interval 359 (179000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3722 2 episodes - episode_reward: 150.840 [20.572, 281.109] - loss: 11.397 - mae: 46.691 - mean_q: 61.027 Interval 360 (179500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2940 1 episodes - episode_reward: 160.242 [160.242, 160.242] - loss: 14.535 - mae: 47.234 - mean_q: 61.375 Interval 361 (180000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6539 1 episodes - episode_reward: 205.966 [205.966, 205.966] - loss: 10.167 - mae: 47.173 - mean_q: 61.479 Interval 362 (180500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3881 1 episodes - episode_reward: 300.161 [300.161, 300.161] - loss: 10.159 - mae: 47.475 - mean_q: 61.746 Interval 363 (181000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1298 2 episodes - episode_reward: 28.080 [-217.468, 273.627] - loss: 9.705 - mae: 47.841 - mean_q: 62.125 Interval 364 (181500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2303 Interval 365 (182000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4271 1 episodes - episode_reward: 273.747 [273.747, 273.747] - loss: 11.463 - mae: 48.492 - mean_q: 62.546 Interval 366 (182500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5472 1 episodes - episode_reward: 213.688 [213.688, 213.688] - loss: 7.704 - mae: 48.734 - mean_q: 62.942 Interval 367 (183000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1312 1 episodes - episode_reward: 277.284 [277.284, 277.284] - loss: 12.580 - mae: 48.306 - mean_q: 62.827 Interval 368 (183500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2423 1 episodes - episode_reward: -236.700 [-236.700, -236.700] - loss: 9.786 - mae: 48.468 - mean_q: 62.820 Interval 369 (184000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0733 1 episodes - episode_reward: -11.373 [-11.373, -11.373] - loss: 9.164 - mae: 48.322 - mean_q: 63.167 Interval 370 (184500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0444 Interval 371 (185000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4957 1 episodes - episode_reward: 187.169 [187.169, 187.169] - loss: 8.880 - mae: 48.035 - mean_q: 62.577 Interval 372 (185500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6841 3 episodes - episode_reward: 133.704 [-100.000, 265.688] - loss: 9.278 - mae: 48.088 - mean_q: 62.507 Interval 373 (186000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1295 2 episodes - episode_reward: 82.699 [-108.665, 274.063] - loss: 11.253 - mae: 47.629 - mean_q: 61.690 Interval 374 (186500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1931 Interval 375 (187000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2485 2 episodes - episode_reward: -9.527 [-243.960, 224.906] - loss: 12.009 - mae: 47.759 - mean_q: 61.737 Interval 376 (187500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1336 Interval 377 (188000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2603 1 episodes - episode_reward: 48.929 [48.929, 48.929] - loss: 11.525 - mae: 46.588 - mean_q: 59.977 Interval 378 (188500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3784 Interval 379 (189000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6110 3 episodes - episode_reward: 150.082 [-100.000, 300.040] - loss: 8.802 - mae: 46.344 - mean_q: 59.710 Interval 380 (189500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1225 1 episodes - episode_reward: -48.875 [-48.875, -48.875] - loss: 10.594 - mae: 46.170 - mean_q: 59.404 Interval 381 (190000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4080 1 episodes - episode_reward: 213.178 [213.178, 213.178] - loss: 10.685 - mae: 46.102 - mean_q: 59.736 Interval 382 (190500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0274 Interval 383 (191000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0260 Interval 384 (191500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0549 Interval 385 (192000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1751e-04 Interval 386 (192500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0183 Interval 387 (193000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5184 1 episodes - episode_reward: 98.104 [98.104, 98.104] - loss: 5.812 - mae: 43.697 - mean_q: 56.735 Interval 388 (193500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0608 1 episodes - episode_reward: 257.845 [257.845, 257.845] - loss: 9.189 - mae: 43.209 - mean_q: 56.020 Interval 389 (194000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4864 3 episodes - episode_reward: -128.826 [-221.392, -39.656] - loss: 7.191 - mae: 43.025 - mean_q: 55.566 Interval 390 (194500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2849 1 episodes - episode_reward: 154.720 [154.720, 154.720] - loss: 10.097 - mae: 42.881 - mean_q: 55.228 Interval 391 (195000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2341 1 episodes - episode_reward: 138.570 [138.570, 138.570] - loss: 8.310 - mae: 43.529 - mean_q: 56.264 Interval 392 (195500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6071 1 episodes - episode_reward: 233.567 [233.567, 233.567] - loss: 7.463 - mae: 43.595 - mean_q: 55.967 Interval 393 (196000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0247 Interval 394 (196500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0415 2 episodes - episode_reward: 46.861 [-118.696, 212.417] - loss: 10.530 - mae: 43.461 - mean_q: 55.892 Interval 395 (197000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1201 Interval 396 (197500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1123 3 episodes - episode_reward: 52.954 [-20.098, 183.531] - loss: 8.887 - mae: 43.144 - mean_q: 56.123 Interval 397 (198000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2999 2 episodes - episode_reward: 46.783 [-105.423, 198.990] - loss: 8.734 - mae: 42.820 - mean_q: 55.803 Interval 398 (198500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4843 2 episodes - episode_reward: 131.187 [57.783, 204.592] - loss: 9.741 - mae: 43.139 - mean_q: 56.092 Interval 399 (199000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4609 3 episodes - episode_reward: -92.777 [-156.170, -21.372] - loss: 7.829 - mae: 43.391 - mean_q: 56.660 Interval 400 (199500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0881 2 episodes - episode_reward: -7.144 [-7.857, -6.431] - loss: 12.654 - mae: 43.597 - mean_q: 56.646 Interval 401 (200000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0097 Interval 402 (200500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.8044 7 episodes - episode_reward: -92.286 [-251.058, 123.471] - loss: 8.974 - mae: 42.512 - mean_q: 55.092 Interval 403 (201000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2621 4 episodes - episode_reward: -89.797 [-408.038, 265.924] - loss: 18.082 - mae: 42.853 - mean_q: 54.779 Interval 404 (201500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4313 1 episodes - episode_reward: 207.173 [207.173, 207.173] - loss: 10.561 - mae: 42.877 - mean_q: 54.759 Interval 405 (202000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0979 1 episodes - episode_reward: -9.581 [-9.581, -9.581] - loss: 11.931 - mae: 42.763 - mean_q: 54.862 Interval 406 (202500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4760 1 episodes - episode_reward: 225.012 [225.012, 225.012] - loss: 9.797 - mae: 42.643 - mean_q: 55.077 Interval 407 (203000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3016 2 episodes - episode_reward: 124.637 [-9.152, 258.426] - loss: 15.684 - mae: 42.623 - mean_q: 54.606 Interval 408 (203500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1696 Interval 409 (204000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1676 3 episodes - episode_reward: -51.261 [-188.700, 157.022] - loss: 12.275 - mae: 42.789 - mean_q: 54.941 Interval 410 (204500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4761 3 episodes - episode_reward: 130.554 [-98.600, 252.297] - loss: 11.487 - mae: 42.701 - mean_q: 54.956 Interval 411 (205000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2234 Interval 412 (205500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2226 2 episodes - episode_reward: 44.220 [-122.554, 210.995] - loss: 10.886 - mae: 42.336 - mean_q: 54.242 Interval 413 (206000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4941 1 episodes - episode_reward: 220.399 [220.399, 220.399] - loss: 9.685 - mae: 42.066 - mean_q: 53.935 Interval 414 (206500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3975 1 episodes - episode_reward: 245.214 [245.214, 245.214] - loss: 10.913 - mae: 41.950 - mean_q: 54.053 Interval 415 (207000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1565 3 episodes - episode_reward: 58.281 [-34.039, 197.663] - loss: 9.055 - mae: 42.194 - mean_q: 54.670 Interval 416 (207500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2385 Interval 417 (208000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0771 2 episodes - episode_reward: 61.573 [-107.266, 230.412] - loss: 9.489 - mae: 42.001 - mean_q: 54.153 Interval 418 (208500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3082 1 episodes - episode_reward: 210.612 [210.612, 210.612] - loss: 8.030 - mae: 42.108 - mean_q: 54.443 Interval 419 (209000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3070 1 episodes - episode_reward: 135.987 [135.987, 135.987] - loss: 9.505 - mae: 42.343 - mean_q: 55.309 Interval 420 (209500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2600 1 episodes - episode_reward: -24.644 [-24.644, -24.644] - loss: 8.791 - mae: 42.134 - mean_q: 55.171 Interval 421 (210000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5854 1 episodes - episode_reward: 293.131 [293.131, 293.131] - loss: 9.060 - mae: 42.181 - mean_q: 54.956 Interval 422 (210500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0369 Interval 423 (211000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3125 2 episodes - episode_reward: 63.684 [-102.582, 229.950] - loss: 9.540 - mae: 42.101 - mean_q: 55.218 Interval 424 (211500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1971 2 episodes - episode_reward: 101.441 [-66.112, 268.994] - loss: 10.623 - mae: 41.978 - mean_q: 55.249 Interval 425 (212000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0207 1 episodes - episode_reward: -23.601 [-23.601, -23.601] - loss: 15.976 - mae: 42.206 - mean_q: 55.325 Interval 426 (212500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0276 Interval 427 (213000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0265 Interval 428 (213500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0219 Interval 429 (214000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2680 1 episodes - episode_reward: 131.696 [131.696, 131.696] - loss: 10.004 - mae: 41.409 - mean_q: 54.319 Interval 430 (214500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0204 Interval 431 (215000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2618 1 episodes - episode_reward: 144.446 [144.446, 144.446] - loss: 11.353 - mae: 41.206 - mean_q: 53.949 Interval 432 (215500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7724 3 episodes - episode_reward: -105.673 [-182.777, -34.244] - loss: 12.054 - mae: 41.053 - mean_q: 53.906 Interval 433 (216000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0892 1 episodes - episode_reward: -172.343 [-172.343, -172.343] - loss: 9.181 - mae: 40.586 - mean_q: 53.611 Interval 434 (216500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3966 3 episodes - episode_reward: 113.698 [-126.335, 266.385] - loss: 9.394 - mae: 40.282 - mean_q: 52.826 Interval 435 (217000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8429 2 episodes - episode_reward: -445.552 [-614.053, -277.051] - loss: 12.899 - mae: 40.448 - mean_q: 53.317 Interval 436 (217500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4819 4 episodes - episode_reward: -61.851 [-169.583, 3.412] - loss: 10.217 - mae: 40.504 - mean_q: 53.142 Interval 437 (218000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4592 3 episodes - episode_reward: 68.186 [-23.996, 233.084] - loss: 14.555 - mae: 40.696 - mean_q: 53.453 Interval 438 (218500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0120 1 episodes - episode_reward: -28.671 [-28.671, -28.671] - loss: 15.967 - mae: 40.547 - mean_q: 53.346 Interval 439 (219000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4601 1 episodes - episode_reward: 211.303 [211.303, 211.303] - loss: 6.067 - mae: 40.699 - mean_q: 53.393 Interval 440 (219500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1120 Interval 441 (220000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1161 4 episodes - episode_reward: -112.021 [-453.606, 218.058] - loss: 10.043 - mae: 41.224 - mean_q: 54.299 Interval 442 (220500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4007 2 episodes - episode_reward: -105.704 [-208.806, -2.602] - loss: 12.042 - mae: 41.122 - mean_q: 53.678 Interval 443 (221000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0244 1 episodes - episode_reward: -57.049 [-57.049, -57.049] - loss: 10.505 - mae: 41.163 - mean_q: 53.537 Interval 444 (221500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4517 1 episodes - episode_reward: 226.907 [226.907, 226.907] - loss: 15.530 - mae: 41.282 - mean_q: 53.701 Interval 445 (222000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3104 1 episodes - episode_reward: 200.023 [200.023, 200.023] - loss: 12.646 - mae: 41.087 - mean_q: 53.296 Interval 446 (222500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3014 Interval 447 (223000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0396 1 episodes - episode_reward: -158.835 [-158.835, -158.835] - loss: 11.075 - mae: 41.404 - mean_q: 53.636 Interval 448 (223500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4450 1 episodes - episode_reward: 226.029 [226.029, 226.029] - loss: 13.244 - mae: 41.464 - mean_q: 53.307 Interval 449 (224000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0427 1 episodes - episode_reward: -6.268 [-6.268, -6.268] - loss: 13.267 - mae: 41.295 - mean_q: 53.555 Interval 450 (224500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0802 2 episodes - episode_reward: -198.613 [-363.468, -33.759] - loss: 9.717 - mae: 41.290 - mean_q: 53.757 Interval 451 (225000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4308 2 episodes - episode_reward: -398.071 [-402.584, -393.558] - loss: 12.327 - mae: 40.844 - mean_q: 52.584 Interval 452 (225500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4756 2 episodes - episode_reward: -88.070 [-115.874, -60.266] - loss: 11.948 - mae: 40.855 - mean_q: 52.148 Interval 453 (226000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6326 1 episodes - episode_reward: 167.226 [167.226, 167.226] - loss: 17.085 - mae: 40.877 - mean_q: 51.208 Interval 454 (226500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1973 2 episodes - episode_reward: 111.400 [-38.070, 260.871] - loss: 14.266 - mae: 40.438 - mean_q: 51.927 Interval 455 (227000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1694 Interval 456 (227500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2335 3 episodes - episode_reward: 70.810 [-209.222, 240.675] - loss: 14.769 - mae: 39.994 - mean_q: 51.354 Interval 457 (228000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4867 2 episodes - episode_reward: 125.749 [-54.121, 305.619] - loss: 13.038 - mae: 39.842 - mean_q: 50.442 Interval 458 (228500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0228 1 episodes - episode_reward: -43.535 [-43.535, -43.535] - loss: 8.548 - mae: 39.694 - mean_q: 50.208 Interval 459 (229000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0282 Interval 460 (229500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4953 3 episodes - episode_reward: -93.770 [-268.728, 88.009] - loss: 9.998 - mae: 38.933 - mean_q: 49.013 Interval 461 (230000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7251 1 episodes - episode_reward: -318.081 [-318.081, -318.081] - loss: 12.990 - mae: 39.262 - mean_q: 49.271 Interval 462 (230500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0023 Interval 463 (231000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2317 Interval 464 (231500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0741 2 episodes - episode_reward: -108.853 [-168.181, -49.525] - loss: 13.168 - mae: 39.104 - mean_q: 48.367 Interval 465 (232000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1782 2 episodes - episode_reward: 63.569 [-98.713, 225.852] - loss: 13.337 - mae: 38.647 - mean_q: 47.758 Interval 466 (232500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0383 Interval 467 (233000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2359 1 episodes - episode_reward: 203.315 [203.315, 203.315] - loss: 13.052 - mae: 38.549 - mean_q: 47.381 Interval 468 (233500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0137 Interval 469 (234000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0428 Interval 470 (234500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3259 1 episodes - episode_reward: 106.876 [106.876, 106.876] - loss: 12.830 - mae: 37.981 - mean_q: 47.224 Interval 471 (235000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5741 2 episodes - episode_reward: -143.809 [-184.357, -103.260] - loss: 11.405 - mae: 37.495 - mean_q: 46.590 Interval 472 (235500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5619 2 episodes - episode_reward: -86.311 [-104.252, -68.371] - loss: 10.365 - mae: 37.424 - mean_q: 46.330 Interval 473 (236000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7651 3 episodes - episode_reward: -150.986 [-272.330, -55.382] - loss: 14.386 - mae: 37.463 - mean_q: 46.445 Interval 474 (236500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1415 Interval 475 (237000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.1093 Interval 476 (237500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3621 1 episodes - episode_reward: 82.961 [82.961, 82.961] - loss: 8.884 - mae: 36.999 - mean_q: 45.605 Interval 477 (238000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2212 2 episodes - episode_reward: -70.043 [-100.000, -40.087] - loss: 11.443 - mae: 36.896 - mean_q: 45.414 Interval 478 (238500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1802 1 episodes - episode_reward: 230.327 [230.327, 230.327] - loss: 10.105 - mae: 36.781 - mean_q: 45.753 Interval 479 (239000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3167 1 episodes - episode_reward: 94.764 [94.764, 94.764] - loss: 9.440 - mae: 36.974 - mean_q: 45.402 Interval 480 (239500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0165 1 episodes - episode_reward: 135.829 [135.829, 135.829] - loss: 10.493 - mae: 37.157 - mean_q: 45.777 Interval 481 (240000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3616 1 episodes - episode_reward: -348.848 [-348.848, -348.848] - loss: 10.675 - mae: 37.274 - mean_q: 45.383 Interval 482 (240500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2684 1 episodes - episode_reward: 221.835 [221.835, 221.835] - loss: 10.676 - mae: 37.194 - mean_q: 45.871 Interval 483 (241000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1943 Interval 484 (241500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3402 1 episodes - episode_reward: 192.938 [192.938, 192.938] - loss: 7.925 - mae: 37.433 - mean_q: 46.861 Interval 485 (242000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3668 1 episodes - episode_reward: 170.377 [170.377, 170.377] - loss: 9.807 - mae: 37.766 - mean_q: 47.108 Interval 486 (242500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1302 Interval 487 (243000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3513 2 episodes - episode_reward: -100.721 [-374.548, 173.107] - loss: 11.156 - mae: 38.584 - mean_q: 47.909 Interval 488 (243500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4910 1 episodes - episode_reward: 256.530 [256.530, 256.530] - loss: 11.618 - mae: 38.898 - mean_q: 47.353 Interval 489 (244000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5039 1 episodes - episode_reward: -277.667 [-277.667, -277.667] - loss: 11.155 - mae: 38.884 - mean_q: 48.074 Interval 490 (244500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5139 1 episodes - episode_reward: 226.326 [226.326, 226.326] - loss: 10.810 - mae: 39.037 - mean_q: 48.006 Interval 491 (245000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7566 4 episodes - episode_reward: -76.524 [-257.496, 200.857] - loss: 10.015 - mae: 38.975 - mean_q: 48.156 Interval 492 (245500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0210 2 episodes - episode_reward: 16.862 [-160.479, 194.203] - loss: 10.607 - mae: 38.775 - mean_q: 46.997 Interval 493 (246000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1655 Interval 494 (246500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0027 Interval 495 (247000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.0875 1 episodes - episode_reward: 159.843 [159.843, 159.843] - loss: 10.627 - mae: 38.755 - mean_q: 47.464 Interval 496 (247500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1313 3 episodes - episode_reward: -212.848 [-352.334, -33.540] - loss: 8.979 - mae: 38.633 - mean_q: 47.617 Interval 497 (248000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3774 1 episodes - episode_reward: 180.197 [180.197, 180.197] - loss: 8.892 - mae: 38.784 - mean_q: 47.658 Interval 498 (248500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4682 1 episodes - episode_reward: 234.918 [234.918, 234.918] - loss: 8.808 - mae: 39.005 - mean_q: 48.249 Interval 499 (249000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4440 1 episodes - episode_reward: 233.300 [233.300, 233.300] - loss: 10.438 - mae: 39.093 - mean_q: 48.033 Interval 500 (249500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2864 3 episodes - episode_reward: -198.754 [-278.259, -55.149] - loss: 12.135 - mae: 39.137 - mean_q: 47.806 Interval 501 (250000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5351 2 episodes - episode_reward: -158.362 [-229.255, -87.469] - loss: 11.285 - mae: 39.683 - mean_q: 48.719 Interval 502 (250500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.0146 Interval 503 (251000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3459 1 episodes - episode_reward: 132.439 [132.439, 132.439] - loss: 10.542 - mae: 39.676 - mean_q: 47.878 Interval 504 (251500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5630 2 episodes - episode_reward: -90.729 [-367.626, 186.168] - loss: 12.125 - mae: 39.719 - mean_q: 47.820 Interval 505 (252000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1061 Interval 506 (252500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0094 2 episodes - episode_reward: 27.013 [-76.985, 131.012] - loss: 10.436 - mae: 39.584 - mean_q: 47.739 Interval 507 (253000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6651 2 episodes - episode_reward: -151.265 [-194.623, -107.907] - loss: 10.674 - mae: 39.563 - mean_q: 47.397 Interval 508 (253500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.0875 1 episodes - episode_reward: -214.819 [-214.819, -214.819] - loss: 11.710 - mae: 39.926 - mean_q: 47.332 Interval 509 (254000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3264 2 episodes - episode_reward: -815.520 [-1436.680, -194.360] - loss: 14.092 - mae: 39.839 - mean_q: 46.685 Interval 510 (254500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0087 2 episodes - episode_reward: 19.363 [-167.603, 206.328] - loss: 11.076 - mae: 39.509 - mean_q: 46.537 Interval 511 (255000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2947 1 episodes - episode_reward: 179.877 [179.877, 179.877] - loss: 11.755 - mae: 39.650 - mean_q: 46.991 Interval 512 (255500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6694 1 episodes - episode_reward: 212.533 [212.533, 212.533] - loss: 11.419 - mae: 39.951 - mean_q: 46.983 Interval 513 (256000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5116 1 episodes - episode_reward: 296.499 [296.499, 296.499] - loss: 10.465 - mae: 40.358 - mean_q: 48.684 Interval 514 (256500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0687 Interval 515 (257000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0841 2 episodes - episode_reward: 77.514 [-100.000, 255.028] - loss: 12.155 - mae: 40.793 - mean_q: 48.851 Interval 516 (257500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4682 1 episodes - episode_reward: 180.611 [180.611, 180.611] - loss: 10.828 - mae: 40.725 - mean_q: 48.744 Interval 517 (258000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0047 Interval 518 (258500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3779 1 episodes - episode_reward: 172.063 [172.063, 172.063] - loss: 11.606 - mae: 40.527 - mean_q: 48.265 Interval 519 (259000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5721 1 episodes - episode_reward: 230.146 [230.146, 230.146] - loss: 9.487 - mae: 40.646 - mean_q: 49.018 Interval 520 (259500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3822 1 episodes - episode_reward: 239.895 [239.895, 239.895] - loss: 9.518 - mae: 40.319 - mean_q: 48.375 Interval 521 (260000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3440 1 episodes - episode_reward: 262.492 [262.492, 262.492] - loss: 10.286 - mae: 40.632 - mean_q: 49.157 Interval 522 (260500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1164 Interval 523 (261000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1862 2 episodes - episode_reward: 79.262 [-32.622, 191.146] - loss: 15.939 - mae: 41.412 - mean_q: 49.409 Interval 524 (261500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3666 1 episodes - episode_reward: 179.796 [179.796, 179.796] - loss: 9.878 - mae: 41.230 - mean_q: 49.437 Interval 525 (262000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4295 1 episodes - episode_reward: 219.601 [219.601, 219.601] - loss: 10.597 - mae: 41.228 - mean_q: 49.459 Interval 526 (262500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9240 1 episodes - episode_reward: -367.021 [-367.021, -367.021] - loss: 11.994 - mae: 41.625 - mean_q: 50.183 Interval 527 (263000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4145 1 episodes - episode_reward: 124.227 [124.227, 124.227] - loss: 11.077 - mae: 41.944 - mean_q: 50.686 Interval 528 (263500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5186 2 episodes - episode_reward: -396.543 [-412.933, -380.153] - loss: 12.914 - mae: 42.324 - mean_q: 50.955 Interval 529 (264000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8922 3 episodes - episode_reward: -143.524 [-385.327, 224.597] - loss: 11.303 - mae: 42.293 - mean_q: 50.501 Interval 530 (264500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4313 1 episodes - episode_reward: 212.169 [212.169, 212.169] - loss: 10.485 - mae: 42.498 - mean_q: 50.543 Interval 531 (265000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6546 1 episodes - episode_reward: 218.457 [218.457, 218.457] - loss: 12.086 - mae: 42.506 - mean_q: 49.979 Interval 532 (265500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3339 1 episodes - episode_reward: 214.721 [214.721, 214.721] - loss: 11.585 - mae: 42.655 - mean_q: 51.148 Interval 533 (266000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2050 1 episodes - episode_reward: 208.857 [208.857, 208.857] - loss: 12.921 - mae: 42.876 - mean_q: 51.304 Interval 534 (266500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2688 2 episodes - episode_reward: -121.553 [-132.472, -110.633] - loss: 9.608 - mae: 43.004 - mean_q: 50.937 Interval 535 (267000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2250 Interval 536 (267500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0682 2 episodes - episode_reward: -19.498 [-218.474, 179.478] - loss: 11.283 - mae: 42.740 - mean_q: 51.528 Interval 537 (268000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4414 1 episodes - episode_reward: 238.076 [238.076, 238.076] - loss: 11.505 - mae: 42.553 - mean_q: 50.908 Interval 538 (268500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4091 4 episodes - episode_reward: -48.351 [-157.164, 188.959] - loss: 10.524 - mae: 42.707 - mean_q: 51.238 Interval 539 (269000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5561 5 episodes - episode_reward: -53.475 [-129.551, 212.779] - loss: 11.129 - mae: 42.704 - mean_q: 51.340 Interval 540 (269500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3508 1 episodes - episode_reward: -359.107 [-359.107, -359.107] - loss: 10.568 - mae: 42.657 - mean_q: 51.062 Interval 541 (270000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1648 1 episodes - episode_reward: 256.089 [256.089, 256.089] - loss: 12.504 - mae: 42.653 - mean_q: 50.741 Interval 542 (270500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3210 2 episodes - episode_reward: -109.825 [-111.873, -107.776] - loss: 13.108 - mae: 42.651 - mean_q: 50.912 Interval 543 (271000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0835 1 episodes - episode_reward: 171.406 [171.406, 171.406] - loss: 12.568 - mae: 42.543 - mean_q: 50.728 Interval 544 (271500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1467 1 episodes - episode_reward: -233.258 [-233.258, -233.258] - loss: 10.155 - mae: 42.662 - mean_q: 51.300 Interval 545 (272000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1251 1 episodes - episode_reward: 207.656 [207.656, 207.656] - loss: 12.978 - mae: 42.295 - mean_q: 50.781 Interval 546 (272500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1531 4 episodes - episode_reward: -192.155 [-281.550, -100.000] - loss: 11.740 - mae: 42.756 - mean_q: 51.056 Interval 547 (273000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3372 2 episodes - episode_reward: -87.476 [-100.000, -74.952] - loss: 10.626 - mae: 42.540 - mean_q: 50.754 Interval 548 (273500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0872 Interval 549 (274000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2518 1 episodes - episode_reward: 172.493 [172.493, 172.493] - loss: 13.001 - mae: 42.426 - mean_q: 50.564 Interval 550 (274500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9571 3 episodes - episode_reward: -164.733 [-263.037, -100.000] - loss: 13.231 - mae: 42.458 - mean_q: 50.199 Interval 551 (275000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2304 Interval 552 (275500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2771 1 episodes - episode_reward: 245.292 [245.292, 245.292] - loss: 10.244 - mae: 42.227 - mean_q: 50.863 Interval 553 (276000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1732 Interval 554 (276500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4755 1 episodes - episode_reward: 238.668 [238.668, 238.668] - loss: 10.328 - mae: 42.552 - mean_q: 52.001 Interval 555 (277000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0831 2 episodes - episode_reward: 49.411 [-127.972, 226.794] - loss: 9.024 - mae: 42.640 - mean_q: 51.829 Interval 556 (277500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2844 2 episodes - episode_reward: -104.138 [-105.048, -103.228] - loss: 9.656 - mae: 42.996 - mean_q: 52.075 Interval 557 (278000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1070 Interval 558 (278500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0071 Interval 559 (279000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4411 1 episodes - episode_reward: 204.090 [204.090, 204.090] - loss: 8.611 - mae: 43.025 - mean_q: 52.210 Interval 560 (279500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3618 3 episodes - episode_reward: -40.202 [-236.650, 219.696] - loss: 7.559 - mae: 43.432 - mean_q: 53.227 Interval 561 (280000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7894 3 episodes - episode_reward: -27.286 [-176.295, 181.754] - loss: 9.458 - mae: 43.792 - mean_q: 53.381 Interval 562 (280500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6781 4 episodes - episode_reward: -149.536 [-369.160, 33.177] - loss: 10.016 - mae: 43.655 - mean_q: 53.283 Interval 563 (281000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1296 1 episodes - episode_reward: -120.430 [-120.430, -120.430] - loss: 12.318 - mae: 43.810 - mean_q: 53.028 Interval 564 (281500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2696 2 episodes - episode_reward: 70.860 [-121.597, 263.317] - loss: 11.961 - mae: 44.007 - mean_q: 53.619 Interval 565 (282000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1867 2 episodes - episode_reward: -266.346 [-419.621, -113.072] - loss: 12.059 - mae: 44.240 - mean_q: 54.176 Interval 566 (282500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2575 Interval 567 (283000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0103 Interval 568 (283500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0199 Interval 569 (284000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0139 2 episodes - episode_reward: 79.838 [-47.144, 206.820] - loss: 10.996 - mae: 44.725 - mean_q: 54.666 Interval 570 (284500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6678 2 episodes - episode_reward: -201.681 [-374.249, -29.113] - loss: 12.476 - mae: 45.013 - mean_q: 55.386 Interval 571 (285000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1909 1 episodes - episode_reward: -153.764 [-153.764, -153.764] - loss: 11.530 - mae: 45.086 - mean_q: 55.624 Interval 572 (285500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2537 2 episodes - episode_reward: 107.930 [-6.891, 222.751] - loss: 10.670 - mae: 44.442 - mean_q: 54.822 Interval 573 (286000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5744 3 episodes - episode_reward: -96.425 [-214.482, -8.018] - loss: 14.542 - mae: 44.223 - mean_q: 54.503 Interval 574 (286500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2898 1 episodes - episode_reward: 30.827 [30.827, 30.827] - loss: 9.210 - mae: 44.388 - mean_q: 54.705 Interval 575 (287000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1104 Interval 576 (287500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4300e-04 Interval 577 (288000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2213 2 episodes - episode_reward: 34.825 [-191.037, 260.687] - loss: 12.399 - mae: 43.408 - mean_q: 54.006 Interval 578 (288500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3005 2 episodes - episode_reward: -131.936 [-148.222, -115.651] - loss: 10.198 - mae: 43.268 - mean_q: 53.421 Interval 579 (289000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4484 1 episodes - episode_reward: 198.510 [198.510, 198.510] - loss: 10.858 - mae: 43.179 - mean_q: 52.941 Interval 580 (289500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3665 3 episodes - episode_reward: 107.417 [-100.000, 224.197] - loss: 14.302 - mae: 43.118 - mean_q: 52.803 Interval 581 (290000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0880 1 episodes - episode_reward: -133.477 [-133.477, -133.477] - loss: 12.388 - mae: 42.637 - mean_q: 52.526 Interval 582 (290500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1503 Interval 583 (291000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1376 2 episodes - episode_reward: 15.279 [-68.037, 98.595] - loss: 11.938 - mae: 43.031 - mean_q: 52.843 Interval 584 (291500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.5314 1 episodes - episode_reward: 263.439 [263.439, 263.439] - loss: 12.560 - mae: 43.409 - mean_q: 53.462 Interval 585 (292000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1920 3 episodes - episode_reward: -170.489 [-223.189, -108.474] - loss: 14.173 - mae: 43.425 - mean_q: 53.889 Interval 586 (292500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2008 2 episodes - episode_reward: -76.507 [-105.498, -47.517] - loss: 12.012 - mae: 43.450 - mean_q: 53.630 Interval 587 (293000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1478 1 episodes - episode_reward: 132.109 [132.109, 132.109] - loss: 16.213 - mae: 42.493 - mean_q: 52.530 Interval 588 (293500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1166 Interval 589 (294000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5667 3 episodes - episode_reward: -131.155 [-198.478, -97.206] - loss: 13.148 - mae: 42.656 - mean_q: 53.340 Interval 590 (294500 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -5.8747e-04 1 episodes - episode_reward: 137.931 [137.931, 137.931] - loss: 11.394 - mae: 42.858 - mean_q: 52.993 Interval 591 (295000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.6366 2 episodes - episode_reward: -239.057 [-306.457, -171.658] - loss: 13.121 - mae: 42.826 - mean_q: 53.086 Interval 592 (295500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4408 1 episodes - episode_reward: 243.676 [243.676, 243.676] - loss: 13.385 - mae: 42.695 - mean_q: 53.270 Interval 593 (296000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2748 1 episodes - episode_reward: -241.696 [-241.696, -241.696] - loss: 11.424 - mae: 42.832 - mean_q: 53.337 Interval 594 (296500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.5834 2 episodes - episode_reward: 167.075 [129.877, 204.272] - loss: 12.976 - mae: 43.075 - mean_q: 54.070 Interval 595 (297000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6985 1 episodes - episode_reward: 207.680 [207.680, 207.680] - loss: 11.148 - mae: 43.500 - mean_q: 54.271 Interval 596 (297500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2606 1 episodes - episode_reward: 302.884 [302.884, 302.884] - loss: 9.160 - mae: 43.558 - mean_q: 55.023 Interval 597 (298000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0324 Interval 598 (298500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.5207 1 episodes - episode_reward: 167.835 [167.835, 167.835] - loss: 13.310 - mae: 44.116 - mean_q: 55.416 Interval 599 (299000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.8647 2 episodes - episode_reward: 259.901 [237.361, 282.441] - loss: 10.742 - mae: 44.043 - mean_q: 55.702 Interval 600 (299500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2481 Interval 601 (300000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1455 2 episodes - episode_reward: 62.904 [-150.260, 276.068] - loss: 11.968 - mae: 44.297 - mean_q: 56.445 Interval 602 (300500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.5554 1 episodes - episode_reward: 146.313 [146.313, 146.313] - loss: 10.662 - mae: 44.490 - mean_q: 57.058 Interval 603 (301000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0129 Interval 604 (301500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2468 1 episodes - episode_reward: 216.942 [216.942, 216.942] - loss: 11.506 - mae: 45.075 - mean_q: 57.870 Interval 605 (302000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2665 1 episodes - episode_reward: 143.134 [143.134, 143.134] - loss: 9.771 - mae: 45.166 - mean_q: 58.120 Interval 606 (302500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6665 3 episodes - episode_reward: -133.188 [-184.649, -70.790] - loss: 15.302 - mae: 45.731 - mean_q: 58.820 Interval 607 (303000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0249 Interval 608 (303500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3068 1 episodes - episode_reward: 181.619 [181.619, 181.619] - loss: 10.748 - mae: 45.864 - mean_q: 59.165 Interval 609 (304000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3264 1 episodes - episode_reward: 158.765 [158.765, 158.765] - loss: 10.426 - mae: 45.812 - mean_q: 59.161 Interval 610 (304500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3093 1 episodes - episode_reward: 150.268 [150.268, 150.268] - loss: 16.014 - mae: 46.062 - mean_q: 59.420 Interval 611 (305000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3477 1 episodes - episode_reward: 232.209 [232.209, 232.209] - loss: 9.350 - mae: 45.740 - mean_q: 59.167 Interval 612 (305500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2260 1 episodes - episode_reward: -94.385 [-94.385, -94.385] - loss: 11.486 - mae: 45.808 - mean_q: 58.965 Interval 613 (306000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3075 3 episodes - episode_reward: -13.278 [-198.970, 307.395] - loss: 12.127 - mae: 45.623 - mean_q: 58.886 Interval 614 (306500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1488 2 episodes - episode_reward: -70.228 [-111.406, -29.049] - loss: 10.762 - mae: 45.945 - mean_q: 59.319 Interval 615 (307000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2698 1 episodes - episode_reward: 289.634 [289.634, 289.634] - loss: 13.021 - mae: 45.444 - mean_q: 58.632 Interval 616 (307500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6002 3 episodes - episode_reward: -98.852 [-129.704, -40.653] - loss: 13.326 - mae: 45.483 - mean_q: 58.431 Interval 617 (308000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0011 2 episodes - episode_reward: -304.059 [-425.475, -182.642] - loss: 13.078 - mae: 45.258 - mean_q: 58.023 Interval 618 (308500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1196 Interval 619 (309000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0171 Interval 620 (309500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3183 2 episodes - episode_reward: 17.119 [-225.882, 260.121] - loss: 13.061 - mae: 45.275 - mean_q: 57.791 Interval 621 (310000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.1085 Interval 622 (310500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2588 2 episodes - episode_reward: 59.021 [-93.219, 211.260] - loss: 12.552 - mae: 45.381 - mean_q: 58.355 Interval 623 (311000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1781 Interval 624 (311500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1654 1 episodes - episode_reward: 228.224 [228.224, 228.224] - loss: 12.219 - mae: 44.713 - mean_q: 57.512 Interval 625 (312000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0288 Interval 626 (312500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0075 Interval 627 (313000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.3546 1 episodes - episode_reward: 35.146 [35.146, 35.146] - loss: 14.863 - mae: 45.029 - mean_q: 57.575 Interval 628 (313500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1707 2 episodes - episode_reward: 89.145 [-107.507, 285.796] - loss: 14.637 - mae: 44.384 - mean_q: 56.701 Interval 629 (314000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7691 2 episodes - episode_reward: -272.265 [-413.510, -131.020] - loss: 11.971 - mae: 44.049 - mean_q: 56.969 Interval 630 (314500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2405 1 episodes - episode_reward: -137.181 [-137.181, -137.181] - loss: 13.796 - mae: 44.808 - mean_q: 57.512 Interval 631 (315000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3524 1 episodes - episode_reward: 141.708 [141.708, 141.708] - loss: 15.949 - mae: 44.272 - mean_q: 56.546 Interval 632 (315500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2582 1 episodes - episode_reward: 167.124 [167.124, 167.124] - loss: 13.112 - mae: 44.464 - mean_q: 56.588 Interval 633 (316000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3969 1 episodes - episode_reward: -155.052 [-155.052, -155.052] - loss: 12.544 - mae: 44.686 - mean_q: 56.747 Interval 634 (316500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3981 1 episodes - episode_reward: 132.079 [132.079, 132.079] - loss: 12.452 - mae: 44.103 - mean_q: 55.766 Interval 635 (317000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9405 4 episodes - episode_reward: -109.009 [-280.898, 133.440] - loss: 12.402 - mae: 44.451 - mean_q: 56.712 Interval 636 (317500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3395 2 episodes - episode_reward: 79.954 [-107.254, 267.163] - loss: 13.858 - mae: 44.228 - mean_q: 56.058 Interval 637 (318000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0645 1 episodes - episode_reward: -19.087 [-19.087, -19.087] - loss: 17.773 - mae: 44.143 - mean_q: 56.151 Interval 638 (318500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1475 1 episodes - episode_reward: 176.374 [176.374, 176.374] - loss: 13.294 - mae: 43.633 - mean_q: 55.404 Interval 639 (319000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6298 3 episodes - episode_reward: -121.998 [-159.473, -67.520] - loss: 17.130 - mae: 43.416 - mean_q: 55.204 Interval 640 (319500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1144 1 episodes - episode_reward: 110.992 [110.992, 110.992] - loss: 12.949 - mae: 43.107 - mean_q: 54.877 Interval 641 (320000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3983 1 episodes - episode_reward: -303.261 [-303.261, -303.261] - loss: 15.523 - mae: 42.903 - mean_q: 54.158 Interval 642 (320500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0021 Interval 643 (321000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1827 2 episodes - episode_reward: -15.321 [-205.259, 174.618] - loss: 11.979 - mae: 42.603 - mean_q: 53.716 Interval 644 (321500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2178 1 episodes - episode_reward: -37.223 [-37.223, -37.223] - loss: 11.377 - mae: 43.249 - mean_q: 54.715 Interval 645 (322000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0764 Interval 646 (322500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2356 1 episodes - episode_reward: 196.539 [196.539, 196.539] - loss: 13.562 - mae: 43.468 - mean_q: 55.228 Interval 647 (323000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3697 4 episodes - episode_reward: -29.141 [-107.840, 149.542] - loss: 21.308 - mae: 43.318 - mean_q: 54.815 Interval 648 (323500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.6006 1 episodes - episode_reward: 256.267 [256.267, 256.267] - loss: 21.425 - mae: 43.818 - mean_q: 55.738 Interval 649 (324000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4251 2 episodes - episode_reward: -163.144 [-273.519, -52.769] - loss: 27.985 - mae: 44.364 - mean_q: 56.048 Interval 650 (324500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0716 Interval 651 (325000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1462 2 episodes - episode_reward: 63.500 [-124.194, 251.195] - loss: 16.279 - mae: 44.946 - mean_q: 56.976 Interval 652 (325500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0420 Interval 653 (326000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0768 2 episodes - episode_reward: 7.256 [-168.452, 182.964] - loss: 17.763 - mae: 45.618 - mean_q: 57.798 Interval 654 (326500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3590 1 episodes - episode_reward: 195.720 [195.720, 195.720] - loss: 21.704 - mae: 45.571 - mean_q: 57.756 Interval 655 (327000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2166 1 episodes - episode_reward: -51.174 [-51.174, -51.174] - loss: 19.688 - mae: 45.782 - mean_q: 58.301 Interval 656 (327500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4729 1 episodes - episode_reward: 192.340 [192.340, 192.340] - loss: 15.186 - mae: 45.776 - mean_q: 58.378 Interval 657 (328000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6310 3 episodes - episode_reward: -124.938 [-175.375, -98.930] - loss: 15.963 - mae: 46.133 - mean_q: 58.512 Interval 658 (328500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0924 Interval 659 (329000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1614 3 episodes - episode_reward: -160.288 [-364.853, 204.804] - loss: 13.644 - mae: 46.387 - mean_q: 58.521 Interval 660 (329500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0839 3 episodes - episode_reward: -144.110 [-273.565, -58.766] - loss: 16.572 - mae: 46.465 - mean_q: 58.381 Interval 661 (330000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5687 3 episodes - episode_reward: -141.928 [-322.581, -5.077] - loss: 21.033 - mae: 46.232 - mean_q: 58.056 Interval 662 (330500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1535 2 episodes - episode_reward: -61.503 [-65.730, -57.275] - loss: 18.327 - mae: 46.193 - mean_q: 58.056 Interval 663 (331000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6689 3 episodes - episode_reward: -124.457 [-259.489, -13.880] - loss: 16.821 - mae: 46.295 - mean_q: 57.711 Interval 664 (331500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1793 2 episodes - episode_reward: -27.641 [-79.796, 24.514] - loss: 19.007 - mae: 46.482 - mean_q: 58.459 Interval 665 (332000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8909 5 episodes - episode_reward: -73.156 [-238.975, 186.579] - loss: 18.246 - mae: 46.090 - mean_q: 57.704 Interval 666 (332500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6539 1 episodes - episode_reward: 277.216 [277.216, 277.216] - loss: 12.428 - mae: 46.195 - mean_q: 57.155 Interval 667 (333000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2926 1 episodes - episode_reward: -59.732 [-59.732, -59.732] - loss: 18.375 - mae: 45.673 - mean_q: 56.005 Interval 668 (333500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8656 3 episodes - episode_reward: -189.138 [-378.831, -80.009] - loss: 20.155 - mae: 46.059 - mean_q: 55.997 Interval 669 (334000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0786 3 episodes - episode_reward: -9.136 [-139.005, 215.877] - loss: 14.859 - mae: 46.075 - mean_q: 56.035 Interval 670 (334500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0185 2 episodes - episode_reward: 18.236 [15.534, 20.938] - loss: 16.320 - mae: 45.929 - mean_q: 56.001 Interval 671 (335000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4284 3 episodes - episode_reward: -66.225 [-101.667, -28.042] - loss: 17.065 - mae: 46.092 - mean_q: 56.095 Interval 672 (335500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0198 1 episodes - episode_reward: 19.178 [19.178, 19.178] - loss: 16.924 - mae: 46.031 - mean_q: 56.365 Interval 673 (336000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1761 Interval 674 (336500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0605 2 episodes - episode_reward: 43.613 [-117.353, 204.580] - loss: 15.166 - mae: 45.222 - mean_q: 55.051 Interval 675 (337000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2661 2 episodes - episode_reward: -13.323 [-95.713, 69.068] - loss: 16.416 - mae: 45.344 - mean_q: 55.246 Interval 676 (337500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4191 4 episodes - episode_reward: -202.610 [-403.138, -29.471] - loss: 19.982 - mae: 45.916 - mean_q: 55.885 Interval 677 (338000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0315 1 episodes - episode_reward: -1.339 [-1.339, -1.339] - loss: 19.898 - mae: 45.718 - mean_q: 55.519 Interval 678 (338500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1304 2 episodes - episode_reward: -65.921 [-74.767, -57.074] - loss: 16.932 - mae: 45.948 - mean_q: 56.016 Interval 679 (339000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1876: 0s - Interval 680 (339500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4199 2 episodes - episode_reward: 1.324 [-18.054, 20.703] - loss: 16.727 - mae: 45.852 - mean_q: 55.651 Interval 681 (340000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0925 4 episodes - episode_reward: -162.946 [-251.525, -107.425] - loss: 21.179 - mae: 44.872 - mean_q: 54.655 Interval 682 (340500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7445 3 episodes - episode_reward: -303.394 [-349.920, -263.320] - loss: 19.351 - mae: 44.159 - mean_q: 52.709 Interval 683 (341000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0792 Interval 684 (341500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0345 Interval 685 (342000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0413 Interval 686 (342500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0349 1 episodes - episode_reward: 101.388 [101.388, 101.388] - loss: 14.051 - mae: 42.203 - mean_q: 51.039 Interval 687 (343000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4945 3 episodes - episode_reward: -87.929 [-138.589, -55.837] - loss: 14.797 - mae: 41.729 - mean_q: 50.573 Interval 688 (343500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0880 Interval 689 (344000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.1092 1 episodes - episode_reward: 131.663 [131.663, 131.663] - loss: 15.384 - mae: 41.291 - mean_q: 49.959 Interval 690 (344500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6799 1 episodes - episode_reward: -333.710 [-333.710, -333.710] - loss: 15.197 - mae: 40.968 - mean_q: 49.962 Interval 691 (345000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5655 2 episodes - episode_reward: -176.179 [-222.595, -129.762] - loss: 17.502 - mae: 40.709 - mean_q: 49.409 Interval 692 (345500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2402 Interval 693 (346000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0615 1 episodes - episode_reward: 231.433 [231.433, 231.433] - loss: 15.912 - mae: 40.744 - mean_q: 49.843 Interval 694 (346500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7743 3 episodes - episode_reward: -161.203 [-250.322, -99.842] - loss: 14.015 - mae: 40.778 - mean_q: 49.454 Interval 695 (347000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4995 1 episodes - episode_reward: -364.710 [-364.710, -364.710] - loss: 19.212 - mae: 40.555 - mean_q: 48.648 Interval 696 (347500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2596 1 episodes - episode_reward: 304.383 [304.383, 304.383] - loss: 16.721 - mae: 40.035 - mean_q: 48.411 Interval 697 (348000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8827 3 episodes - episode_reward: -156.860 [-167.523, -151.099] - loss: 15.431 - mae: 40.274 - mean_q: 47.794 Interval 698 (348500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9343 2 episodes - episode_reward: -233.101 [-321.244, -144.958] - loss: 14.318 - mae: 40.293 - mean_q: 47.665 Interval 699 (349000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1700 Interval 700 (349500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1586 1 episodes - episode_reward: -54.486 [-54.486, -54.486] - loss: 15.497 - mae: 39.999 - mean_q: 47.127 Interval 701 (350000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1286 Interval 702 (350500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1378 2 episodes - episode_reward: 43.876 [-109.477, 197.229] - loss: 14.858 - mae: 40.482 - mean_q: 47.277 Interval 703 (351000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1052 Interval 704 (351500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0815 Interval 705 (352000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3704 1 episodes - episode_reward: 208.698 [208.698, 208.698] - loss: 12.895 - mae: 40.835 - mean_q: 47.283 Interval 706 (352500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0594 Interval 707 (353000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4081 1 episodes - episode_reward: -253.740 [-253.740, -253.740] - loss: 14.196 - mae: 40.848 - mean_q: 47.676 Interval 708 (353500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0314 Interval 709 (354000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0562 2 episodes - episode_reward: 65.547 [-102.008, 233.102] - loss: 14.406 - mae: 40.319 - mean_q: 46.038 Interval 710 (354500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1793 1 episodes - episode_reward: -94.163 [-94.163, -94.163] - loss: 13.927 - mae: 40.279 - mean_q: 46.416 Interval 711 (355000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5300 1 episodes - episode_reward: -95.231 [-95.231, -95.231] - loss: 13.528 - mae: 40.594 - mean_q: 46.306 Interval 712 (355500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2418 1 episodes - episode_reward: -168.576 [-168.576, -168.576] - loss: 15.449 - mae: 40.501 - mean_q: 45.962 Interval 713 (356000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3063 1 episodes - episode_reward: 209.951 [209.951, 209.951] - loss: 16.053 - mae: 40.181 - mean_q: 45.648 Interval 714 (356500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0349 Interval 715 (357000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6129 4 episodes - episode_reward: -198.410 [-350.143, -120.193] - loss: 15.219 - mae: 40.269 - mean_q: 46.316 Interval 716 (357500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0805 Interval 717 (358000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2335 Interval 718 (358500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.0337 Interval 719 (359000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4763 5 episodes - episode_reward: -176.677 [-415.163, -100.000] - loss: 16.089 - mae: 40.918 - mean_q: 47.137 Interval 720 (359500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4022 1 episodes - episode_reward: -191.154 [-191.154, -191.154] - loss: 14.539 - mae: 41.322 - mean_q: 48.175 Interval 721 (360000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6171 1 episodes - episode_reward: -233.496 [-233.496, -233.496] - loss: 14.922 - mae: 41.108 - mean_q: 47.534 Interval 722 (360500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0726 Interval 723 (361000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1962 Interval 724 (361500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2223 Interval 725 (362000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1629 Interval 726 (362500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0711 2 episodes - episode_reward: -178.384 [-256.768, -100.000] - loss: 11.823 - mae: 40.181 - mean_q: 46.418 Interval 727 (363000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2626 2 episodes - episode_reward: 69.830 [-110.434, 250.095] - loss: 18.961 - mae: 39.556 - mean_q: 45.601 Interval 728 (363500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1114 Interval 729 (364000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1144 2 episodes - episode_reward: 20.428 [-116.802, 157.659] - loss: 14.554 - mae: 39.223 - mean_q: 45.510 Interval 730 (364500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0623 Interval 731 (365000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.1278 1 episodes - episode_reward: 116.172 [116.172, 116.172] - loss: 14.178 - mae: 38.489 - mean_q: 44.422 Interval 732 (365500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2306 2 episodes - episode_reward: -122.964 [-149.805, -96.124] - loss: 13.306 - mae: 38.495 - mean_q: 44.499 Interval 733 (366000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1900 1 episodes - episode_reward: 238.301 [238.301, 238.301] - loss: 15.239 - mae: 38.444 - mean_q: 44.799 Interval 734 (366500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.2140 1 episodes - episode_reward: 89.875 [89.875, 89.875] - loss: 15.935 - mae: 38.192 - mean_q: 44.493 Interval 735 (367000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2240 1 episodes - episode_reward: -115.970 [-115.970, -115.970] - loss: 14.580 - mae: 37.813 - mean_q: 44.140 Interval 736 (367500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6430 2 episodes - episode_reward: -210.615 [-273.869, -147.361] - loss: 15.358 - mae: 38.035 - mean_q: 44.319 Interval 737 (368000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6565 2 episodes - episode_reward: -154.628 [-195.276, -113.980] - loss: 16.278 - mae: 37.753 - mean_q: 44.453 Interval 738 (368500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3162 1 episodes - episode_reward: 232.634 [232.634, 232.634] - loss: 17.380 - mae: 37.698 - mean_q: 43.882 Interval 739 (369000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3057 1 episodes - episode_reward: -178.616 [-178.616, -178.616] - loss: 16.593 - mae: 37.938 - mean_q: 44.725 Interval 740 (369500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0774 Interval 741 (370000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0628 Interval 742 (370500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0995 Interval 743 (371000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1461 Interval 744 (371500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0289 Interval 745 (372000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2738 1 episodes - episode_reward: 47.562 [47.562, 47.562] - loss: 14.068 - mae: 38.283 - mean_q: 45.371 Interval 746 (372500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0783 Interval 747 (373000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0568 2 episodes - episode_reward: 23.444 [-129.998, 176.886] - loss: 16.980 - mae: 38.291 - mean_q: 45.783 Interval 748 (373500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7581 4 episodes - episode_reward: -113.668 [-163.656, -91.194] - loss: 13.631 - mae: 38.789 - mean_q: 46.214 Interval 749 (374000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2837 Interval 750 (374500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4290 3 episodes - episode_reward: -126.340 [-159.705, -61.253] - loss: 14.036 - mae: 37.985 - mean_q: 45.481 Interval 751 (375000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1549 2 episodes - episode_reward: 83.530 [-111.440, 278.501] - loss: 17.003 - mae: 38.506 - mean_q: 45.929 Interval 752 (375500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1237 3 episodes - episode_reward: -146.847 [-304.229, -28.648] - loss: 15.733 - mae: 38.401 - mean_q: 45.683 Interval 753 (376000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3634 1 episodes - episode_reward: -305.550 [-305.550, -305.550] - loss: 17.069 - mae: 38.779 - mean_q: 45.936 Interval 754 (376500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0013 1 episodes - episode_reward: -100.310 [-100.310, -100.310] - loss: 18.028 - mae: 39.064 - mean_q: 45.925 Interval 755 (377000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.2418 1 episodes - episode_reward: 214.841 [214.841, 214.841] - loss: 16.129 - mae: 38.914 - mean_q: 45.881 Interval 756 (377500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0719 Interval 757 (378000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1389 2 episodes - episode_reward: 14.258 [-126.170, 154.686] - loss: 17.810 - mae: 38.945 - mean_q: 46.715 Interval 758 (378500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2597 1 episodes - episode_reward: 195.513 [195.513, 195.513] - loss: 13.786 - mae: 38.738 - mean_q: 46.020 Interval 759 (379000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6487 1 episodes - episode_reward: -342.994 [-342.994, -342.994] - loss: 14.685 - mae: 39.232 - mean_q: 46.957 Interval 760 (379500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1317 Interval 761 (380000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8598 3 episodes - episode_reward: -131.162 [-201.631, -91.854] - loss: 14.184 - mae: 39.054 - mean_q: 47.030 Interval 762 (380500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3804 4 episodes - episode_reward: -170.779 [-254.837, -129.121] - loss: 15.226 - mae: 38.535 - mean_q: 45.968 Interval 763 (381000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3640 2 episodes - episode_reward: -150.959 [-171.271, -130.647] - loss: 13.315 - mae: 39.105 - mean_q: 46.926 Interval 764 (381500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0735 Interval 765 (382000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0609 Interval 766 (382500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2131 1 episodes - episode_reward: 140.727 [140.727, 140.727] - loss: 15.286 - mae: 39.036 - mean_q: 47.697 Interval 767 (383000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2765 3 episodes - episode_reward: -31.066 [-109.076, 115.879] - loss: 14.892 - mae: 39.409 - mean_q: 48.158 Interval 768 (383500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4306 1 episodes - episode_reward: -140.319 [-140.319, -140.319] - loss: 15.749 - mae: 39.414 - mean_q: 48.346 Interval 769 (384000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0732 1 episodes - episode_reward: -177.398 [-177.398, -177.398] - loss: 17.832 - mae: 39.224 - mean_q: 47.587 Interval 770 (384500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0591 Interval 771 (385000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2614 2 episodes - episode_reward: 81.049 [49.974, 112.124] - loss: 15.356 - mae: 39.385 - mean_q: 48.143 Interval 772 (385500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3013 1 episodes - episode_reward: 162.727 [162.727, 162.727] - loss: 15.874 - mae: 39.827 - mean_q: 47.971 Interval 773 (386000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5929 1 episodes - episode_reward: -318.470 [-318.470, -318.470] - loss: 16.778 - mae: 39.411 - mean_q: 47.561 Interval 774 (386500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1037 Interval 775 (387000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1348 1 episodes - episode_reward: 136.215 [136.215, 136.215] - loss: 12.822 - mae: 39.225 - mean_q: 47.827 Interval 776 (387500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5849 4 episodes - episode_reward: -204.827 [-410.466, -39.648] - loss: 13.910 - mae: 39.228 - mean_q: 47.566 Interval 777 (388000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2418 1 episodes - episode_reward: -188.727 [-188.727, -188.727] - loss: 15.079 - mae: 39.160 - mean_q: 47.411 Interval 778 (388500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1426 2 episodes - episode_reward: 39.182 [-138.523, 216.886] - loss: 17.593 - mae: 38.968 - mean_q: 47.089 Interval 779 (389000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4216 2 episodes - episode_reward: -87.192 [-170.562, -3.823] - loss: 14.049 - mae: 39.078 - mean_q: 47.186 Interval 780 (389500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6797 1 episodes - episode_reward: 177.350 [177.350, 177.350] - loss: 13.349 - mae: 39.203 - mean_q: 47.722 Interval 781 (390000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4155 2 episodes - episode_reward: 31.806 [-232.875, 296.488] - loss: 13.449 - mae: 39.528 - mean_q: 48.491 Interval 782 (390500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4535 1 episodes - episode_reward: 170.601 [170.601, 170.601] - loss: 14.577 - mae: 39.583 - mean_q: 48.962 Interval 783 (391000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2034 2 episodes - episode_reward: 37.791 [-141.663, 217.244] - loss: 13.566 - mae: 39.549 - mean_q: 48.345 Interval 784 (391500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1646 Interval 785 (392000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6025 2 episodes - episode_reward: 192.456 [187.784, 197.129] - loss: 13.787 - mae: 40.236 - mean_q: 49.103 Interval 786 (392500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1443 2 episodes - episode_reward: 32.828 [-118.426, 184.083] - loss: 14.712 - mae: 40.533 - mean_q: 49.669 Interval 787 (393000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4977 1 episodes - episode_reward: 272.168 [272.168, 272.168] - loss: 12.048 - mae: 40.506 - mean_q: 50.226 Interval 788 (393500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0457 1 episodes - episode_reward: -82.279 [-82.279, -82.279] - loss: 10.999 - mae: 40.883 - mean_q: 50.449 Interval 789 (394000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0250 2 episodes - episode_reward: 36.013 [-157.714, 229.739] - loss: 17.845 - mae: 40.878 - mean_q: 50.262 Interval 790 (394500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2224 Interval 791 (395000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3503 1 episodes - episode_reward: 213.892 [213.892, 213.892] - loss: 11.432 - mae: 40.525 - mean_q: 50.092 Interval 792 (395500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.5323 1 episodes - episode_reward: 190.383 [190.383, 190.383] - loss: 12.354 - mae: 40.413 - mean_q: 49.799 Interval 793 (396000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0871 6 episodes - episode_reward: -66.918 [-174.071, 262.617] - loss: 14.230 - mae: 40.399 - mean_q: 49.494 Interval 794 (396500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.5437 1 episodes - episode_reward: 184.887 [184.887, 184.887] - loss: 12.282 - mae: 41.041 - mean_q: 50.622 Interval 795 (397000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2653 2 episodes - episode_reward: 125.643 [37.782, 213.504] - loss: 13.533 - mae: 41.082 - mean_q: 51.400 Interval 796 (397500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.1101 Interval 797 (398000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3732 1 episodes - episode_reward: 150.658 [150.658, 150.658] - loss: 15.442 - mae: 41.107 - mean_q: 51.422 Interval 798 (398500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2366 1 episodes - episode_reward: 179.807 [179.807, 179.807] - loss: 13.448 - mae: 40.997 - mean_q: 51.028 Interval 799 (399000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4312 1 episodes - episode_reward: 211.478 [211.478, 211.478] - loss: 13.433 - mae: 40.764 - mean_q: 51.162 Interval 800 (399500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0158 Interval 801 (400000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1273 2 episodes - episode_reward: -2.771 [-114.970, 109.428] - loss: 14.055 - mae: 40.178 - mean_q: 50.370 Interval 802 (400500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1794 1 episodes - episode_reward: 149.747 [149.747, 149.747] - loss: 13.227 - mae: 40.613 - mean_q: 50.739 Interval 803 (401000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1466 1 episodes - episode_reward: -46.703 [-46.703, -46.703] - loss: 11.051 - mae: 40.858 - mean_q: 51.119 Interval 804 (401500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1601 2 episodes - episode_reward: 27.881 [-180.162, 235.923] - loss: 12.444 - mae: 41.242 - mean_q: 51.798 Interval 805 (402000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2074 2 episodes - episode_reward: -64.259 [-138.758, 10.239] - loss: 13.316 - mae: 41.092 - mean_q: 51.488 Interval 806 (402500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3328 1 episodes - episode_reward: 227.436 [227.436, 227.436] - loss: 15.239 - mae: 41.187 - mean_q: 51.687 Interval 807 (403000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1147 2 episodes - episode_reward: -82.657 [-141.212, -24.101] - loss: 15.383 - mae: 41.210 - mean_q: 51.536 Interval 808 (403500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1305 2 episodes - episode_reward: -43.618 [-101.266, 14.030] - loss: 14.873 - mae: 41.609 - mean_q: 51.968 Interval 809 (404000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0925 Interval 810 (404500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1620 1 episodes - episode_reward: 240.654 [240.654, 240.654] - loss: 18.766 - mae: 41.254 - mean_q: 51.124 Interval 811 (405000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9167 3 episodes - episode_reward: -134.130 [-243.764, -43.032] - loss: 13.250 - mae: 41.058 - mean_q: 51.072 Interval 812 (405500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6955 3 episodes - episode_reward: -134.050 [-177.832, -56.706] - loss: 16.055 - mae: 40.588 - mean_q: 50.116 Interval 813 (406000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2604 3 episodes - episode_reward: -51.654 [-93.782, -19.680] - loss: 14.428 - mae: 40.327 - mean_q: 50.130 Interval 814 (406500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3095 2 episodes - episode_reward: -76.576 [-116.157, -36.995] - loss: 15.454 - mae: 40.372 - mean_q: 49.936 Interval 815 (407000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2657 3 episodes - episode_reward: -41.194 [-114.848, 47.246] - loss: 15.173 - mae: 40.293 - mean_q: 50.137 Interval 816 (407500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7540 1 episodes - episode_reward: -378.467 [-378.467, -378.467] - loss: 12.138 - mae: 40.219 - mean_q: 49.510 Interval 817 (408000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8210 2 episodes - episode_reward: -164.968 [-197.456, -132.480] - loss: 16.357 - mae: 40.168 - mean_q: 49.250 Interval 818 (408500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4699 3 episodes - episode_reward: -82.595 [-374.998, 236.264] - loss: 20.570 - mae: 39.798 - mean_q: 48.393 Interval 819 (409000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2882 2 episodes - episode_reward: 48.530 [-100.000, 197.060] - loss: 29.410 - mae: 39.798 - mean_q: 48.138 Interval 820 (409500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2622 2 episodes - episode_reward: 39.805 [-103.240, 182.851] - loss: 14.380 - mae: 39.606 - mean_q: 49.156 Interval 821 (410000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2541 Interval 822 (410500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5704 1 episodes - episode_reward: 256.581 [256.581, 256.581] - loss: 17.019 - mae: 39.231 - mean_q: 49.233 Interval 823 (411000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.7206 2 episodes - episode_reward: 274.400 [263.710, 285.090] - loss: 18.385 - mae: 39.168 - mean_q: 48.988 Interval 824 (411500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1842 1 episodes - episode_reward: -175.685 [-175.685, -175.685] - loss: 15.373 - mae: 39.194 - mean_q: 48.868 Interval 825 (412000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1499 2 episodes - episode_reward: 43.910 [-89.689, 177.508] - loss: 14.593 - mae: 39.499 - mean_q: 48.842 Interval 826 (412500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0927 Interval 827 (413000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3532 2 episodes - episode_reward: 0.165 [-213.364, 213.695] - loss: 13.934 - mae: 39.195 - mean_q: 48.540 Interval 828 (413500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0528 1 episodes - episode_reward: -166.256 [-166.256, -166.256] - loss: 11.661 - mae: 39.606 - mean_q: 49.048 Interval 829 (414000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0538 1 episodes - episode_reward: 63.129 [63.129, 63.129] - loss: 17.597 - mae: 39.787 - mean_q: 49.561 Interval 830 (414500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4852 2 episodes - episode_reward: 165.901 [129.609, 202.194] - loss: 17.251 - mae: 39.376 - mean_q: 48.624 Interval 831 (415000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5309 2 episodes - episode_reward: -139.702 [-248.278, -31.127] - loss: 17.861 - mae: 39.350 - mean_q: 48.308 Interval 832 (415500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3394 2 episodes - episode_reward: -103.820 [-113.932, -93.709] - loss: 14.898 - mae: 39.491 - mean_q: 48.285 Interval 833 (416000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4029 3 episodes - episode_reward: 12.916 [-137.569, 207.882] - loss: 15.980 - mae: 39.336 - mean_q: 48.642 Interval 834 (416500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6915 1 episodes - episode_reward: -1070.868 [-1070.868, -1070.868] - loss: 14.404 - mae: 39.224 - mean_q: 47.988 Interval 835 (417000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5378 2 episodes - episode_reward: -131.676 [-151.773, -111.579] - loss: 17.409 - mae: 39.136 - mean_q: 47.977 Interval 836 (417500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0182 1 episodes - episode_reward: -144.023 [-144.023, -144.023] - loss: 15.792 - mae: 39.308 - mean_q: 48.592 Interval 837 (418000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2239 2 episodes - episode_reward: -4.122 [-103.872, 95.628] - loss: 14.159 - mae: 39.436 - mean_q: 48.351 Interval 838 (418500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2882 1 episodes - episode_reward: 265.892 [265.892, 265.892] - loss: 19.194 - mae: 39.356 - mean_q: 47.957 Interval 839 (419000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3007 1 episodes - episode_reward: -261.441 [-261.441, -261.441] - loss: 15.927 - mae: 39.248 - mean_q: 47.752 Interval 840 (419500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0495 Interval 841 (420000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3424 4 episodes - episode_reward: -159.454 [-582.678, 130.143] - loss: 23.496 - mae: 39.238 - mean_q: 47.937 Interval 842 (420500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5660 2 episodes - episode_reward: -144.774 [-225.100, -64.448] - loss: 19.495 - mae: 39.487 - mean_q: 48.842 Interval 843 (421000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0484 2 episodes - episode_reward: -10.619 [-230.373, 209.134] - loss: 14.014 - mae: 39.428 - mean_q: 48.285 Interval 844 (421500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3291 2 episodes - episode_reward: 88.202 [-86.749, 263.152] - loss: 20.404 - mae: 39.808 - mean_q: 48.823 Interval 845 (422000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0619 Interval 846 (422500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5750 2 episodes - episode_reward: -239.926 [-288.722, -191.129] - loss: 19.143 - mae: 40.238 - mean_q: 49.309 Interval 847 (423000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0653 Interval 848 (423500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3197 1 episodes - episode_reward: 222.750 [222.750, 222.750] - loss: 21.981 - mae: 40.577 - mean_q: 49.837 Interval 849 (424000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3076 4 episodes - episode_reward: -135.234 [-349.690, 153.555] - loss: 20.255 - mae: 40.447 - mean_q: 49.626 Interval 850 (424500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8672 1 episodes - episode_reward: -455.157 [-455.157, -455.157] - loss: 17.677 - mae: 40.956 - mean_q: 49.829 Interval 851 (425000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1411 Interval 852 (425500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8085 3 episodes - episode_reward: -122.731 [-254.912, 132.772] - loss: 21.587 - mae: 42.063 - mean_q: 51.308 Interval 853 (426000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2231 1 episodes - episode_reward: 34.976 [34.976, 34.976] - loss: 16.626 - mae: 42.458 - mean_q: 51.886 Interval 854 (426500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2694 1 episodes - episode_reward: 192.820 [192.820, 192.820] - loss: 18.383 - mae: 42.556 - mean_q: 51.306 Interval 855 (427000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5012 2 episodes - episode_reward: -99.164 [-137.045, -61.282] - loss: 19.192 - mae: 42.226 - mean_q: 51.661 Interval 856 (427500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1453 1 episodes - episode_reward: -129.080 [-129.080, -129.080] - loss: 17.442 - mae: 42.507 - mean_q: 52.082 Interval 857 (428000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0615 1 episodes - episode_reward: -20.688 [-20.688, -20.688] - loss: 18.786 - mae: 43.325 - mean_q: 53.049 Interval 858 (428500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0461 2 episodes - episode_reward: -54.386 [-74.153, -34.619] - loss: 17.226 - mae: 43.283 - mean_q: 53.294 Interval 859 (429000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3619 1 episodes - episode_reward: 296.458 [296.458, 296.458] - loss: 19.997 - mae: 43.978 - mean_q: 53.886 Interval 860 (429500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4218 1 episodes - episode_reward: 241.596 [241.596, 241.596] - loss: 22.248 - mae: 44.364 - mean_q: 54.039 Interval 861 (430000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.3949 3 episodes - episode_reward: -206.552 [-297.954, -114.788] - loss: 19.985 - mae: 44.976 - mean_q: 54.592 Interval 862 (430500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0526 1 episodes - episode_reward: -198.567 [-198.567, -198.567] - loss: 18.541 - mae: 44.774 - mean_q: 54.526 Interval 863 (431000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.5132 1 episodes - episode_reward: 188.202 [188.202, 188.202] - loss: 17.080 - mae: 45.450 - mean_q: 55.560 Interval 864 (431500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6058 2 episodes - episode_reward: 236.612 [213.819, 259.406] - loss: 17.885 - mae: 45.477 - mean_q: 55.564 Interval 865 (432000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3887 Interval 866 (432500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5961 4 episodes - episode_reward: -27.378 [-167.796, 287.072] - loss: 16.390 - mae: 45.235 - mean_q: 55.723 Interval 867 (433000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3103 1 episodes - episode_reward: 170.404 [170.404, 170.404] - loss: 18.029 - mae: 45.839 - mean_q: 56.376 Interval 868 (433500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5642 2 episodes - episode_reward: -139.288 [-183.906, -94.669] - loss: 17.444 - mae: 46.403 - mean_q: 56.959 Interval 869 (434000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0085 1 episodes - episode_reward: -13.947 [-13.947, -13.947] - loss: 20.729 - mae: 47.015 - mean_q: 57.142 Interval 870 (434500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0957 1 episodes - episode_reward: -123.674 [-123.674, -123.674] - loss: 15.544 - mae: 47.445 - mean_q: 57.887 Interval 871 (435000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0780 1 episodes - episode_reward: 184.918 [184.918, 184.918] - loss: 18.575 - mae: 47.185 - mean_q: 57.693 Interval 872 (435500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0571 1 episodes - episode_reward: -189.679 [-189.679, -189.679] - loss: 16.833 - mae: 47.191 - mean_q: 58.321 Interval 873 (436000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2750 2 episodes - episode_reward: 78.985 [-100.000, 257.969] - loss: 16.252 - mae: 48.186 - mean_q: 59.560 Interval 874 (436500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5093 1 episodes - episode_reward: 292.152 [292.152, 292.152] - loss: 24.169 - mae: 48.260 - mean_q: 59.474 Interval 875 (437000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0343 2 episodes - episode_reward: 20.531 [-153.901, 194.963] - loss: 20.821 - mae: 48.678 - mean_q: 60.002 Interval 876 (437500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2358 2 episodes - episode_reward: 83.015 [33.076, 132.953] - loss: 17.535 - mae: 48.823 - mean_q: 60.946 Interval 877 (438000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6158 1 episodes - episode_reward: 247.805 [247.805, 247.805] - loss: 20.721 - mae: 48.442 - mean_q: 60.468 Interval 878 (438500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2696 1 episodes - episode_reward: -148.971 [-148.971, -148.971] - loss: 19.388 - mae: 49.454 - mean_q: 60.104 Interval 879 (439000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1496 1 episodes - episode_reward: 217.799 [217.799, 217.799] - loss: 21.075 - mae: 49.595 - mean_q: 61.041 Interval 880 (439500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1233 2 episodes - episode_reward: 20.485 [-170.562, 211.532] - loss: 15.361 - mae: 49.254 - mean_q: 60.399 Interval 881 (440000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1789 1 episodes - episode_reward: -170.557 [-170.557, -170.557] - loss: 22.760 - mae: 49.608 - mean_q: 61.744 Interval 882 (440500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3460 1 episodes - episode_reward: -147.614 [-147.614, -147.614] - loss: 19.673 - mae: 49.235 - mean_q: 60.526 Interval 883 (441000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2134 1 episodes - episode_reward: -62.841 [-62.841, -62.841] - loss: 17.865 - mae: 49.212 - mean_q: 60.487 Interval 884 (441500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0247 Interval 885 (442000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1874 1 episodes - episode_reward: 79.711 [79.711, 79.711] - loss: 21.816 - mae: 49.245 - mean_q: 60.682 Interval 886 (442500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3732 1 episodes - episode_reward: 149.403 [149.403, 149.403] - loss: 18.089 - mae: 49.150 - mean_q: 60.533 Interval 887 (443000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1845 3 episodes - episode_reward: -21.752 [-55.148, 15.994] - loss: 20.965 - mae: 49.146 - mean_q: 60.502 Interval 888 (443500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0392 1 episodes - episode_reward: -47.738 [-47.738, -47.738] - loss: 17.922 - mae: 49.189 - mean_q: 60.508 Interval 889 (444000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2848 2 episodes - episode_reward: -71.932 [-133.683, -10.181] - loss: 15.922 - mae: 49.582 - mean_q: 61.219 Interval 890 (444500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4008 1 episodes - episode_reward: -167.440 [-167.440, -167.440] - loss: 21.223 - mae: 49.728 - mean_q: 61.209 Interval 891 (445000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0391 1 episodes - episode_reward: -37.052 [-37.052, -37.052] - loss: 18.476 - mae: 49.753 - mean_q: 60.529 Interval 892 (445500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1534 1 episodes - episode_reward: 10.850 [10.850, 10.850] - loss: 21.716 - mae: 50.234 - mean_q: 60.943 Interval 893 (446000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1685 1 episodes - episode_reward: 198.009 [198.009, 198.009] - loss: 15.634 - mae: 50.272 - mean_q: 62.341 Interval 894 (446500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2514 Interval 895 (447000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4840 1 episodes - episode_reward: 237.861 [237.861, 237.861] - loss: 18.288 - mae: 50.261 - mean_q: 62.862 Interval 896 (447500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0796 Interval 897 (448000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2906 1 episodes - episode_reward: 216.601 [216.601, 216.601] - loss: 18.743 - mae: 50.215 - mean_q: 62.480 Interval 898 (448500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1403 Interval 899 (449000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0465 2 episodes - episode_reward: 7.027 [-114.148, 128.203] - loss: 18.161 - mae: 49.804 - mean_q: 62.112 Interval 900 (449500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2103 Interval 901 (450000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0810 Interval 902 (450500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0901 2 episodes - episode_reward: 11.565 [-108.963, 132.094] - loss: 18.156 - mae: 50.313 - mean_q: 62.491 Interval 903 (451000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2040 Interval 904 (451500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3308 1 episodes - episode_reward: 36.430 [36.430, 36.430] - loss: 20.017 - mae: 49.800 - mean_q: 62.461 Interval 905 (452000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0034 Interval 906 (452500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0158 Interval 907 (453000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1433 1 episodes - episode_reward: 168.428 [168.428, 168.428] - loss: 15.956 - mae: 49.034 - mean_q: 62.084 Interval 908 (453500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1517 1 episodes - episode_reward: -79.425 [-79.425, -79.425] - loss: 14.464 - mae: 49.297 - mean_q: 62.535 Interval 909 (454000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0966 Interval 910 (454500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0538 1 episodes - episode_reward: 38.031 [38.031, 38.031] - loss: 14.943 - mae: 48.765 - mean_q: 61.154 Interval 911 (455000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1887 1 episodes - episode_reward: -155.613 [-155.613, -155.613] - loss: 14.730 - mae: 48.262 - mean_q: 60.346 Interval 912 (455500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0757 Interval 913 (456000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2104 1 episodes - episode_reward: -212.288 [-212.288, -212.288] - loss: 15.229 - mae: 46.604 - mean_q: 58.957 Interval 914 (456500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2851 1 episodes - episode_reward: -64.083 [-64.083, -64.083] - loss: 19.974 - mae: 46.312 - mean_q: 59.007 Interval 915 (457000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1481 1 episodes - episode_reward: 108.493 [108.493, 108.493] - loss: 14.820 - mae: 45.484 - mean_q: 57.035 Interval 916 (457500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0765 Interval 917 (458000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0769 Interval 918 (458500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: 0.0302 Interval 919 (459000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.1435 Interval 920 (459500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.3527 1 episodes - episode_reward: -79.091 [-79.091, -79.091] - loss: 15.173 - mae: 43.828 - mean_q: 55.823 Interval 921 (460000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3170 1 episodes - episode_reward: -69.245 [-69.245, -69.245] - loss: 18.751 - mae: 43.140 - mean_q: 54.316 Interval 922 (460500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5405 2 episodes - episode_reward: -421.004 [-626.996, -215.011] - loss: 16.320 - mae: 42.786 - mean_q: 53.613 Interval 923 (461000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4761 1 episodes - episode_reward: -185.327 [-185.327, -185.327] - loss: 16.741 - mae: 43.020 - mean_q: 53.472 Interval 924 (461500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2340 Interval 925 (462000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0936 Interval 926 (462500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1237 1 episodes - episode_reward: 152.599 [152.599, 152.599] - loss: 18.410 - mae: 42.167 - mean_q: 52.278 Interval 927 (463000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1824 Interval 928 (463500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0729 2 episodes - episode_reward: 37.236 [-50.183, 124.654] - loss: 13.264 - mae: 42.128 - mean_q: 52.729 Interval 929 (464000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0353 Interval 930 (464500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2242 3 episodes - episode_reward: -4.707 [-98.462, 145.047] - loss: 14.868 - mae: 41.507 - mean_q: 52.204 Interval 931 (465000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4790 1 episodes - episode_reward: 269.417 [269.417, 269.417] - loss: 11.101 - mae: 41.480 - mean_q: 52.066 Interval 932 (465500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0033 Interval 933 (466000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3114 1 episodes - episode_reward: 239.912 [239.912, 239.912] - loss: 16.217 - mae: 41.657 - mean_q: 51.992 Interval 934 (466500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1441 Interval 935 (467000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.2505 1 episodes - episode_reward: 108.071 [108.071, 108.071] - loss: 14.413 - mae: 41.062 - mean_q: 52.440 Interval 936 (467500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5328 1 episodes - episode_reward: 257.009 [257.009, 257.009] - loss: 15.793 - mae: 40.982 - mean_q: 52.279 Interval 937 (468000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1258 1 episodes - episode_reward: -151.493 [-151.493, -151.493] - loss: 13.612 - mae: 40.910 - mean_q: 52.160 Interval 938 (468500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0114 Interval 939 (469000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4388 3 episodes - episode_reward: -45.227 [-235.044, 189.408] - loss: 13.053 - mae: 41.364 - mean_q: 52.947 Interval 940 (469500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2738 2 episodes - episode_reward: -127.258 [-142.036, -112.480] - loss: 13.531 - mae: 41.501 - mean_q: 53.078 Interval 941 (470000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3632 1 episodes - episode_reward: 255.882 [255.882, 255.882] - loss: 10.812 - mae: 41.289 - mean_q: 52.971 Interval 942 (470500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3042 1 episodes - episode_reward: 9.609 [9.609, 9.609] - loss: 11.877 - mae: 41.457 - mean_q: 53.233 Interval 943 (471000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1601 1 episodes - episode_reward: 281.929 [281.929, 281.929] - loss: 10.900 - mae: 41.450 - mean_q: 53.150 Interval 944 (471500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0715 Interval 945 (472000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1234 Interval 946 (472500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4570 1 episodes - episode_reward: -439.097 [-439.097, -439.097] - loss: 13.539 - mae: 41.072 - mean_q: 52.732 Interval 947 (473000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2549 1 episodes - episode_reward: 258.500 [258.500, 258.500] - loss: 11.661 - mae: 40.703 - mean_q: 51.948 Interval 948 (473500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2006 Interval 949 (474000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0669 Interval 950 (474500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0239 Interval 951 (475000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0372 Interval 952 (475500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0389 Interval 953 (476000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2377 1 episodes - episode_reward: 154.614 [154.614, 154.614] - loss: 11.279 - mae: 38.764 - mean_q: 50.373 Interval 954 (476500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.7302 1 episodes - episode_reward: 167.919 [167.919, 167.919] - loss: 9.607 - mae: 38.814 - mean_q: 50.407 Interval 955 (477000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1225 2 episodes - episode_reward: 134.829 [-49.672, 319.331] - loss: 11.425 - mae: 38.744 - mean_q: 50.110 Interval 956 (477500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1176 1 episodes - episode_reward: -39.075 [-39.075, -39.075] - loss: 14.854 - mae: 38.617 - mean_q: 50.008 Interval 957 (478000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0434 4 episodes - episode_reward: -111.174 [-306.320, 231.510] - loss: 11.637 - mae: 38.879 - mean_q: 50.755 Interval 958 (478500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0261 1 episodes - episode_reward: -63.783 [-63.783, -63.783] - loss: 10.310 - mae: 38.801 - mean_q: 50.491 Interval 959 (479000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0564 2 episodes - episode_reward: 66.087 [-80.736, 212.910] - loss: 10.929 - mae: 38.654 - mean_q: 50.318 Interval 960 (479500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6566 1 episodes - episode_reward: 233.110 [233.110, 233.110] - loss: 11.154 - mae: 38.825 - mean_q: 50.546 Interval 961 (480000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0324 Interval 962 (480500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1532 1 episodes - episode_reward: 175.165 [175.165, 175.165] - loss: 12.325 - mae: 39.049 - mean_q: 50.866 Interval 963 (481000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1757 Interval 964 (481500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0519 Interval 965 (482000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0690 Interval 966 (482500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0692 Interval 967 (483000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0762 Interval 968 (483500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.0226 Interval 969 (484000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4246 1 episodes - episode_reward: 42.877 [42.877, 42.877] - loss: 13.220 - mae: 39.825 - mean_q: 51.865 Interval 970 (484500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2204 2 episodes - episode_reward: 123.743 [42.387, 205.099] - loss: 12.502 - mae: 39.957 - mean_q: 52.239 Interval 971 (485000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1597 2 episodes - episode_reward: -68.119 [-71.598, -64.639] - loss: 13.363 - mae: 40.183 - mean_q: 52.553 Interval 972 (485500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1205 Interval 973 (486000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0599 Interval 974 (486500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0880 Interval 975 (487000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.0746 1 episodes - episode_reward: 121.040 [121.040, 121.040] - loss: 13.404 - mae: 40.565 - mean_q: 53.457 Interval 976 (487500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9726 2 episodes - episode_reward: -302.565 [-385.077, -220.052] - loss: 10.846 - mae: 40.606 - mean_q: 53.866 Interval 977 (488000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4261 5 episodes - episode_reward: -126.337 [-203.130, -61.488] - loss: 9.618 - mae: 40.857 - mean_q: 54.062 Interval 978 (488500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2176 Interval 979 (489000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0406 Interval 980 (489500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7334 3 episodes - episode_reward: -130.732 [-272.098, 144.051] - loss: 16.591 - mae: 41.477 - mean_q: 54.348 Interval 981 (490000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0833 Interval 982 (490500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1809 2 episodes - episode_reward: 57.402 [-109.876, 224.679] - loss: 10.223 - mae: 41.495 - mean_q: 54.106 Interval 983 (491000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5450 3 episodes - episode_reward: -75.512 [-341.979, 215.443] - loss: 13.039 - mae: 41.368 - mean_q: 54.147 Interval 984 (491500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0652 1 episodes - episode_reward: 206.938 [206.938, 206.938] - loss: 9.983 - mae: 41.085 - mean_q: 53.822 Interval 985 (492000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0645 3 episodes - episode_reward: -69.258 [-328.715, 266.199] - loss: 11.379 - mae: 41.120 - mean_q: 53.681 Interval 986 (492500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1722 Interval 987 (493000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7061 3 episodes - episode_reward: -95.683 [-305.628, 215.920] - loss: 12.641 - mae: 40.083 - mean_q: 52.142 Interval 988 (493500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2031 Interval 989 (494000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3389 1 episodes - episode_reward: 278.049 [278.049, 278.049] - loss: 11.039 - mae: 39.506 - mean_q: 50.982 Interval 990 (494500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0077 Interval 991 (495000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0523 Interval 992 (495500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4140 1 episodes - episode_reward: 145.500 [145.500, 145.500] - loss: 10.646 - mae: 38.237 - mean_q: 49.413 Interval 993 (496000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4413 1 episodes - episode_reward: 204.989 [204.989, 204.989] - loss: 13.062 - mae: 38.670 - mean_q: 49.557 Interval 994 (496500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1801 2 episodes - episode_reward: 14.633 [-202.489, 231.756] - loss: 10.235 - mae: 38.739 - mean_q: 49.494 Interval 995 (497000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3981 2 episodes - episode_reward: -178.715 [-257.429, -100.000] - loss: 13.293 - mae: 39.013 - mean_q: 49.734 Interval 996 (497500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6161 1 episodes - episode_reward: 258.732 [258.732, 258.732] - loss: 10.009 - mae: 38.522 - mean_q: 48.828 Interval 997 (498000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0740 4 episodes - episode_reward: 13.922 [-150.300, 310.329] - loss: 13.022 - mae: 38.710 - mean_q: 48.826 Interval 998 (498500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2597 1 episodes - episode_reward: 213.697 [213.697, 213.697] - loss: 12.632 - mae: 38.368 - mean_q: 48.414 Interval 999 (499000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0353 Interval 1000 (499500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0666 Interval 1001 (500000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6375 3 episodes - episode_reward: -110.092 [-301.053, 71.373] - loss: 10.578 - mae: 38.764 - mean_q: 48.715 Interval 1002 (500500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0891A: 0s Interval 1003 (501000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2230 2 episodes - episode_reward: 89.682 [24.766, 154.599] - loss: 13.115 - mae: 38.952 - mean_q: 49.237 Interval 1004 (501500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0477 2 episodes - episode_reward: -36.633 [-195.974, 122.707] - loss: 11.173 - mae: 39.093 - mean_q: 48.959 Interval 1005 (502000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.1898 1 episodes - episode_reward: 181.648 [181.648, 181.648] - loss: 11.627 - mae: 39.095 - mean_q: 49.027 Interval 1006 (502500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3232 2 episodes - episode_reward: 26.335 [-164.998, 217.669] - loss: 11.791 - mae: 39.247 - mean_q: 49.172 Interval 1007 (503000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1181 Interval 1008 (503500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0208 Interval 1009 (504000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5186 1 episodes - episode_reward: 239.979 [239.979, 239.979] - loss: 10.481 - mae: 39.692 - mean_q: 49.894 Interval 1010 (504500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8428 3 episodes - episode_reward: -107.457 [-371.473, 259.207] - loss: 11.270 - mae: 39.333 - mean_q: 49.345 Interval 1011 (505000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4800 1 episodes - episode_reward: 201.928 [201.928, 201.928] - loss: 16.860 - mae: 39.229 - mean_q: 49.873 Interval 1012 (505500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4315e-04 2 episodes - episode_reward: 64.483 [-113.742, 242.708] - loss: 8.081 - mae: 38.707 - mean_q: 49.009 Interval 1013 (506000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.7950 1 episodes - episode_reward: 185.000 [185.000, 185.000] - loss: 9.893 - mae: 38.562 - mean_q: 48.511 Interval 1014 (506500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6722 2 episodes - episode_reward: 248.035 [209.300, 286.770] - loss: 9.141 - mae: 38.231 - mean_q: 47.980 Interval 1015 (507000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3685 1 episodes - episode_reward: 213.814 [213.814, 213.814] - loss: 11.030 - mae: 38.382 - mean_q: 48.238 Interval 1016 (507500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2164 1 episodes - episode_reward: -212.805 [-212.805, -212.805] - loss: 10.232 - mae: 38.334 - mean_q: 48.836 Interval 1017 (508000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5528 2 episodes - episode_reward: 186.534 [176.277, 196.791] - loss: 13.818 - mae: 37.990 - mean_q: 47.768 Interval 1018 (508500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0578 1 episodes - episode_reward: -75.903 [-75.903, -75.903] - loss: 10.847 - mae: 38.142 - mean_q: 47.630 Interval 1019 (509000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8871 2 episodes - episode_reward: -447.778 [-586.708, -308.847] - loss: 11.527 - mae: 38.146 - mean_q: 48.116 Interval 1020 (509500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2889 Interval 1021 (510000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1712 2 episodes - episode_reward: 111.878 [-15.076, 238.832] - loss: 9.781 - mae: 37.850 - mean_q: 47.493 Interval 1022 (510500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0438 1 episodes - episode_reward: -39.696 [-39.696, -39.696] - loss: 11.670 - mae: 38.418 - mean_q: 48.494 Interval 1023 (511000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6797 2 episodes - episode_reward: -188.286 [-236.211, -140.362] - loss: 12.847 - mae: 38.429 - mean_q: 48.746 Interval 1024 (511500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.8999 2 episodes - episode_reward: 241.615 [203.062, 280.167] - loss: 10.587 - mae: 38.457 - mean_q: 48.987 Interval 1025 (512000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1209 1 episodes - episode_reward: 11.469 [11.469, 11.469] - loss: 11.590 - mae: 39.280 - mean_q: 49.829 Interval 1026 (512500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1680 2 episodes - episode_reward: 9.883 [-210.260, 230.026] - loss: 14.548 - mae: 39.296 - mean_q: 49.776 Interval 1027 (513000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0288 5 episodes - episode_reward: -99.051 [-166.915, 36.878] - loss: 12.019 - mae: 39.449 - mean_q: 50.069 Interval 1028 (513500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0361 2 episodes - episode_reward: -45.422 [-59.642, -31.202] - loss: 16.393 - mae: 39.792 - mean_q: 50.397 Interval 1029 (514000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1422 Interval 1030 (514500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0895 2 episodes - episode_reward: 137.321 [-1.662, 276.305] - loss: 15.074 - mae: 39.634 - mean_q: 49.691 Interval 1031 (515000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8577 2 episodes - episode_reward: -229.825 [-296.346, -163.304] - loss: 17.103 - mae: 39.603 - mean_q: 49.270 Interval 1032 (515500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0182 Interval 1033 (516000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1084 Interval 1034 (516500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4707 1 episodes - episode_reward: 121.279 [121.279, 121.279] - loss: 13.939 - mae: 39.802 - mean_q: 49.254 Interval 1035 (517000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1442 Interval 1036 (517500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2953 2 episodes - episode_reward: -31.774 [-246.234, 182.685] - loss: 18.448 - mae: 39.398 - mean_q: 48.992 Interval 1037 (518000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2337 Interval 1038 (518500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.1706 Interval 1039 (519000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1485 1 episodes - episode_reward: 55.905 [55.905, 55.905] - loss: 14.950 - mae: 39.437 - mean_q: 49.248 Interval 1040 (519500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1798 2 episodes - episode_reward: 36.443 [-133.375, 206.260] - loss: 11.756 - mae: 39.552 - mean_q: 49.473 Interval 1041 (520000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0694 Interval 1042 (520500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3584 1 episodes - episode_reward: 127.051 [127.051, 127.051] - loss: 12.990 - mae: 38.889 - mean_q: 48.890 Interval 1043 (521000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4622 1 episodes - episode_reward: 209.630 [209.630, 209.630] - loss: 13.427 - mae: 39.160 - mean_q: 49.246 Interval 1044 (521500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1420 Interval 1045 (522000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3325 3 episodes - episode_reward: -46.479 [-223.139, 139.557] - loss: 11.410 - mae: 39.931 - mean_q: 50.053 Interval 1046 (522500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0159 1 episodes - episode_reward: -88.073 [-88.073, -88.073] - loss: 12.722 - mae: 40.088 - mean_q: 50.532 Interval 1047 (523000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7119 1 episodes - episode_reward: -294.865 [-294.865, -294.865] - loss: 10.005 - mae: 40.079 - mean_q: 50.422 Interval 1048 (523500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2460 3 episodes - episode_reward: -375.855 [-601.659, -234.889] - loss: 13.730 - mae: 40.028 - mean_q: 49.817 Interval 1049 (524000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1786 1 episodes - episode_reward: 114.691 [114.691, 114.691] - loss: 13.368 - mae: 40.837 - mean_q: 50.807 Interval 1050 (524500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0829 Interval 1051 (525000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2166 2 episodes - episode_reward: 37.467 [-100.229, 175.164] - loss: 12.430 - mae: 41.670 - mean_q: 51.997 Interval 1052 (525500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0317 Interval 1053 (526000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3992 2 episodes - episode_reward: 78.054 [-26.479, 182.587] - loss: 16.224 - mae: 42.517 - mean_q: 53.159 Interval 1054 (526500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6023 1 episodes - episode_reward: 289.451 [289.451, 289.451] - loss: 14.487 - mae: 43.140 - mean_q: 53.620 Interval 1055 (527000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0761 Interval 1056 (527500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.0652 Interval 1057 (528000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4745 1 episodes - episode_reward: 232.535 [232.535, 232.535] - loss: 12.092 - mae: 43.255 - mean_q: 54.081 Interval 1058 (528500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1056 3 episodes - episode_reward: 23.170 [-89.303, 238.431] - loss: 15.618 - mae: 43.534 - mean_q: 54.665 Interval 1059 (529000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1518 Interval 1060 (529500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0842 3 episodes - episode_reward: -3.801 [-123.371, 192.825] - loss: 15.076 - mae: 43.921 - mean_q: 54.533 Interval 1061 (530000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1422 Interval 1062 (530500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0624 Interval 1063 (531000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.5786 1 episodes - episode_reward: 122.130 [122.130, 122.130] - loss: 12.079 - mae: 44.718 - mean_q: 56.044 Interval 1064 (531500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2378 2 episodes - episode_reward: 101.904 [-64.872, 268.681] - loss: 31.343 - mae: 44.904 - mean_q: 56.432 Interval 1065 (532000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2420 2 episodes - episode_reward: 20.159 [-176.539, 216.857] - loss: 19.534 - mae: 45.191 - mean_q: 57.007 Interval 1066 (532500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3039 Interval 1067 (533000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0677 Interval 1068 (533500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.5356 1 episodes - episode_reward: -281.615 [-281.615, -281.615] - loss: 14.445 - mae: 45.454 - mean_q: 56.902 Interval 1069 (534000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1037 1 episodes - episode_reward: -30.641 [-30.641, -30.641] - loss: 17.989 - mae: 45.859 - mean_q: 57.094 Interval 1070 (534500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0495 1 episodes - episode_reward: -121.515 [-121.515, -121.515] - loss: 12.758 - mae: 45.833 - mean_q: 57.049 Interval 1071 (535000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3947 1 episodes - episode_reward: 268.726 [268.726, 268.726] - loss: 15.976 - mae: 45.705 - mean_q: 56.637 Interval 1072 (535500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.7601 1 episodes - episode_reward: 257.364 [257.364, 257.364] - loss: 14.391 - mae: 45.957 - mean_q: 57.157 Interval 1073 (536000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4305 1 episodes - episode_reward: 277.510 [277.510, 277.510] - loss: 14.287 - mae: 45.860 - mean_q: 57.300 Interval 1074 (536500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3744 1 episodes - episode_reward: 307.636 [307.636, 307.636] - loss: 13.463 - mae: 46.185 - mean_q: 57.302 Interval 1075 (537000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4247 1 episodes - episode_reward: -393.101 [-393.101, -393.101] - loss: 14.155 - mae: 46.572 - mean_q: 57.916 Interval 1076 (537500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2332 1 episodes - episode_reward: 234.098 [234.098, 234.098] - loss: 11.914 - mae: 46.078 - mean_q: 57.728 Interval 1077 (538000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3347 3 episodes - episode_reward: -87.177 [-163.477, 8.080] - loss: 11.585 - mae: 45.792 - mean_q: 57.664 Interval 1078 (538500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3202 3 episodes - episode_reward: 10.685 [-127.551, 227.130] - loss: 15.851 - mae: 45.514 - mean_q: 57.507 Interval 1079 (539000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0335 2 episodes - episode_reward: -122.451 [-170.810, -74.092] - loss: 11.430 - mae: 45.961 - mean_q: 57.384 Interval 1080 (539500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1802 1 episodes - episode_reward: 254.031 [254.031, 254.031] - loss: 14.435 - mae: 45.677 - mean_q: 56.816 Interval 1081 (540000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2099 1 episodes - episode_reward: -207.806 [-207.806, -207.806] - loss: 13.419 - mae: 45.834 - mean_q: 57.552 Interval 1082 (540500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.7357 2 episodes - episode_reward: 208.206 [198.023, 218.389] - loss: 14.920 - mae: 46.305 - mean_q: 57.493 Interval 1083 (541000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0835 Interval 1084 (541500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.7987 2 episodes - episode_reward: 224.175 [186.536, 261.814] - loss: 10.736 - mae: 46.392 - mean_q: 57.723 Interval 1085 (542000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0370 3 episodes - episode_reward: -24.207 [-118.538, 148.414] - loss: 11.071 - mae: 46.307 - mean_q: 57.788 Interval 1086 (542500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5129 1 episodes - episode_reward: 242.732 [242.732, 242.732] - loss: 12.061 - mae: 46.726 - mean_q: 58.315 Interval 1087 (543000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2769 1 episodes - episode_reward: 242.697 [242.697, 242.697] - loss: 13.198 - mae: 47.244 - mean_q: 58.711 Interval 1088 (543500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1392 Interval 1089 (544000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8002 2 episodes - episode_reward: -238.494 [-289.837, -187.151] - loss: 15.949 - mae: 47.636 - mean_q: 59.429 Interval 1090 (544500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2341 1 episodes - episode_reward: -132.840 [-132.840, -132.840] - loss: 11.384 - mae: 47.898 - mean_q: 60.572 Interval 1091 (545000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5498 2 episodes - episode_reward: -98.641 [-199.668, 2.387] - loss: 18.710 - mae: 48.479 - mean_q: 60.978 Interval 1092 (545500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0831 Interval 1093 (546000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1638 1 episodes - episode_reward: -188.108 [-188.108, -188.108] - loss: 12.801 - mae: 48.082 - mean_q: 59.849 Interval 1094 (546500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4786 3 episodes - episode_reward: -198.348 [-361.833, -115.072] - loss: 13.186 - mae: 48.564 - mean_q: 60.459 Interval 1095 (547000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3126 Interval 1096 (547500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3415 1 episodes - episode_reward: -538.581 [-538.581, -538.581] - loss: 16.384 - mae: 48.187 - mean_q: 59.306 Interval 1097 (548000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0470 1 episodes - episode_reward: 216.443 [216.443, 216.443] - loss: 11.239 - mae: 48.137 - mean_q: 59.783 Interval 1098 (548500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2034 Interval 1099 (549000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1575 Interval 1100 (549500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5196 1 episodes - episode_reward: -1152.426 [-1152.426, -1152.426] - loss: 11.118 - mae: 47.854 - mean_q: 59.108 Interval 1101 (550000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 4.5268e-04 Interval 1102 (550500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2053 1 episodes - episode_reward: 187.351 [187.351, 187.351] - loss: 13.112 - mae: 47.643 - mean_q: 59.867 Interval 1103 (551000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3229 Interval 1104 (551500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1874 Interval 1105 (552000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0354 Interval 1106 (552500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1704 Interval 1107 (553000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1895 Interval 1108 (553500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1651 Interval 1109 (554000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1707 Interval 1110 (554500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.1778 Interval 1111 (555000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.2181 Interval 1112 (555500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1457 Interval 1113 (556000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1521 Interval 1114 (556500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2293 Interval 1115 (557000 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.2459 Interval 1116 (557500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1712 Interval 1117 (558000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.2138 Interval 1118 (558500 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.1179 Interval 1119 (559000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1734 Interval 1120 (559500 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1629 Interval 1121 (560000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.0382 Interval 1122 (560500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: 0.5049 1 episodes - episode_reward: -1461.976 [-1461.976, -1461.976] - loss: 12.756 - mae: 51.057 - mean_q: 65.229 Interval 1123 (561000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2128 1 episodes - episode_reward: 132.074 [132.074, 132.074] - loss: 7.530 - mae: 51.198 - mean_q: 65.697 Interval 1124 (561500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3961 3 episodes - episode_reward: -220.371 [-282.099, -164.410] - loss: 10.510 - mae: 50.758 - mean_q: 64.764 Interval 1125 (562000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5655 2 episodes - episode_reward: -169.566 [-240.401, -98.731] - loss: 14.347 - mae: 51.125 - mean_q: 65.237 Interval 1126 (562500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3345 1 episodes - episode_reward: -228.259 [-228.259, -228.259] - loss: 11.814 - mae: 51.056 - mean_q: 65.011 Interval 1127 (563000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1473 Interval 1128 (563500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1183 Interval 1129 (564000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1773 Interval 1130 (564500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2678 Interval 1131 (565000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.1861 1 episodes - episode_reward: -85.496 [-85.496, -85.496] - loss: 9.787 - mae: 50.467 - mean_q: 64.326 Interval 1132 (565500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1978 Interval 1133 (566000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4118 1 episodes - episode_reward: 73.300 [73.300, 73.300] - loss: 9.605 - mae: 50.697 - mean_q: 64.667 Interval 1134 (566500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0660 2 episodes - episode_reward: -33.125 [-88.028, 21.777] - loss: 12.486 - mae: 50.601 - mean_q: 64.735 Interval 1135 (567000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1623 Interval 1136 (567500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4012 4 episodes - episode_reward: -25.496 [-103.737, 187.434] - loss: 8.622 - mae: 50.960 - mean_q: 65.267 Interval 1137 (568000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0776 Interval 1138 (568500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1136 2 episodes - episode_reward: -15.474 [-111.029, 80.081] - loss: 11.991 - mae: 50.181 - mean_q: 64.271 Interval 1139 (569000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0511 1 episodes - episode_reward: -42.218 [-42.218, -42.218] - loss: 11.221 - mae: 49.740 - mean_q: 62.876 Interval 1140 (569500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0098 Interval 1141 (570000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4966 1 episodes - episode_reward: 223.099 [223.099, 223.099] - loss: 10.375 - mae: 48.942 - mean_q: 61.866 Interval 1142 (570500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0065 Interval 1143 (571000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2937 3 episodes - episode_reward: 102.375 [-173.533, 251.684] - loss: 11.858 - mae: 48.274 - mean_q: 60.693 Interval 1144 (571500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0029 1 episodes - episode_reward: -85.257 [-85.257, -85.257] - loss: 9.698 - mae: 47.888 - mean_q: 60.030 Interval 1145 (572000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0353 Interval 1146 (572500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.2959 1 episodes - episode_reward: 172.705 [172.705, 172.705] - loss: 9.189 - mae: 47.988 - mean_q: 60.301 Interval 1147 (573000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0108 1 episodes - episode_reward: -81.187 [-81.187, -81.187] - loss: 9.631 - mae: 47.489 - mean_q: 59.519 Interval 1148 (573500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0299 Interval 1149 (574000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0385 Interval 1150 (574500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1343 2 episodes - episode_reward: 51.905 [-77.934, 181.744] - loss: 10.075 - mae: 46.284 - mean_q: 59.048 Interval 1151 (575000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1463 Interval 1152 (575500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8301 4 episodes - episode_reward: -79.591 [-218.342, 216.366] - loss: 10.282 - mae: 46.120 - mean_q: 58.767 Interval 1153 (576000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0781 2 episodes - episode_reward: -43.439 [-50.799, -36.078] - loss: 12.163 - mae: 45.430 - mean_q: 58.067 Interval 1154 (576500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3834 1 episodes - episode_reward: 193.749 [193.749, 193.749] - loss: 16.011 - mae: 45.642 - mean_q: 58.464 Interval 1155 (577000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5864 6 episodes - episode_reward: -124.847 [-307.926, 170.745] - loss: 16.368 - mae: 45.734 - mean_q: 58.374 Interval 1156 (577500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4916 4 episodes - episode_reward: -190.947 [-397.951, -21.996] - loss: 15.835 - mae: 45.780 - mean_q: 58.617 Interval 1157 (578000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3329 7 episodes - episode_reward: -85.941 [-138.501, 10.025] - loss: 19.483 - mae: 45.381 - mean_q: 57.357 Interval 1158 (578500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4678 2 episodes - episode_reward: -136.114 [-157.334, -114.893] - loss: 16.640 - mae: 45.037 - mean_q: 56.852 Interval 1159 (579000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4868 3 episodes - episode_reward: -108.694 [-162.920, -66.996] - loss: 18.718 - mae: 44.866 - mean_q: 57.620 Interval 1160 (579500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4950 2 episodes - episode_reward: -74.604 [-451.199, 301.991] - loss: 21.818 - mae: 44.922 - mean_q: 57.581 Interval 1161 (580000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6098 2 episodes - episode_reward: -179.185 [-288.521, -69.850] - loss: 19.957 - mae: 44.243 - mean_q: 56.416 Interval 1162 (580500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0619 Interval 1163 (581000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3798 1 episodes - episode_reward: -260.878 [-260.878, -260.878] - loss: 16.714 - mae: 43.004 - mean_q: 54.288 Interval 1164 (581500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3541 1 episodes - episode_reward: 183.609 [183.609, 183.609] - loss: 16.804 - mae: 42.289 - mean_q: 53.691 Interval 1165 (582000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1071 Interval 1166 (582500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0465 Interval 1167 (583000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0191 Interval 1168 (583500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1008 Interval 1169 (584000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.0556 1 episodes - episode_reward: 78.476 [78.476, 78.476] - loss: 12.963 - mae: 40.903 - mean_q: 51.227 Interval 1170 (584500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0506 1 episodes - episode_reward: -163.692 [-163.692, -163.692] - loss: 16.892 - mae: 41.002 - mean_q: 50.980 Interval 1171 (585000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4032 1 episodes - episode_reward: 188.941 [188.941, 188.941] - loss: 20.809 - mae: 40.515 - mean_q: 49.891 Interval 1172 (585500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2842 1 episodes - episode_reward: 203.249 [203.249, 203.249] - loss: 16.450 - mae: 40.117 - mean_q: 49.928 Interval 1173 (586000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2746 1 episodes - episode_reward: 229.682 [229.682, 229.682] - loss: 14.351 - mae: 39.745 - mean_q: 49.222 Interval 1174 (586500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3876 1 episodes - episode_reward: -169.293 [-169.293, -169.293] - loss: 15.154 - mae: 39.629 - mean_q: 49.257 Interval 1175 (587000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0212 1 episodes - episode_reward: -147.756 [-147.756, -147.756] - loss: 14.089 - mae: 39.616 - mean_q: 49.268 Interval 1176 (587500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0091 Interval 1177 (588000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0151 Interval 1178 (588500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0827 2 episodes - episode_reward: 0.151 [-182.544, 182.846] - loss: 12.131 - mae: 39.033 - mean_q: 48.672 Interval 1179 (589000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3196 1 episodes - episode_reward: 225.481 [225.481, 225.481] - loss: 10.295 - mae: 38.886 - mean_q: 48.058 Interval 1180 (589500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3156 1 episodes - episode_reward: -144.079 [-144.079, -144.079] - loss: 16.531 - mae: 38.367 - mean_q: 47.827 Interval 1181 (590000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4937 1 episodes - episode_reward: 207.154 [207.154, 207.154] - loss: 12.589 - mae: 38.404 - mean_q: 47.952 Interval 1182 (590500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7152 2 episodes - episode_reward: -144.225 [-197.774, -90.675] - loss: 13.127 - mae: 38.304 - mean_q: 47.536 Interval 1183 (591000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2413 1 episodes - episode_reward: -123.637 [-123.637, -123.637] - loss: 13.802 - mae: 37.963 - mean_q: 47.624 Interval 1184 (591500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2459 1 episodes - episode_reward: -106.553 [-106.553, -106.553] - loss: 14.299 - mae: 38.134 - mean_q: 47.151 Interval 1185 (592000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2302 4 episodes - episode_reward: -26.970 [-123.354, 149.483] - loss: 14.925 - mae: 38.202 - mean_q: 47.473 Interval 1186 (592500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3791 1 episodes - episode_reward: 184.683 [184.683, 184.683] - loss: 14.333 - mae: 38.178 - mean_q: 47.506 Interval 1187 (593000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3152 2 episodes - episode_reward: -92.933 [-111.140, -74.726] - loss: 13.607 - mae: 37.479 - mean_q: 47.178 Interval 1188 (593500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4571 2 episodes - episode_reward: -104.361 [-118.246, -90.476] - loss: 15.268 - mae: 37.977 - mean_q: 47.691 Interval 1189 (594000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0049 Interval 1190 (594500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0258 3 episodes - episode_reward: -14.562 [-109.381, 130.604] - loss: 12.849 - mae: 36.910 - mean_q: 46.514 Interval 1191 (595000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1473 Interval 1192 (595500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0667 1 episodes - episode_reward: -187.802 [-187.802, -187.802] - loss: 13.826 - mae: 37.270 - mean_q: 46.792 Interval 1193 (596000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6341 2 episodes - episode_reward: 207.730 [175.743, 239.716] - loss: 14.943 - mae: 37.226 - mean_q: 46.592 Interval 1194 (596500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3100 Interval 1195 (597000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6410 2 episodes - episode_reward: -97.017 [-466.755, 272.721] - loss: 12.266 - mae: 36.933 - mean_q: 46.742 Interval 1196 (597500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0961 2 episodes - episode_reward: 26.244 [-100.000, 152.489] - loss: 12.668 - mae: 36.643 - mean_q: 46.212 Interval 1197 (598000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0885 Interval 1198 (598500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3180 3 episodes - episode_reward: -24.001 [-137.847, 169.226] - loss: 11.871 - mae: 36.825 - mean_q: 46.200 Interval 1199 (599000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0978 1 episodes - episode_reward: -113.509 [-113.509, -113.509] - loss: 13.321 - mae: 36.750 - mean_q: 45.859 Interval 1200 (599500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0675 3 episodes - episode_reward: 60.101 [-50.879, 139.025] - loss: 14.350 - mae: 36.508 - mean_q: 46.150 Interval 1201 (600000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2090 1 episodes - episode_reward: -292.186 [-292.186, -292.186] - loss: 12.092 - mae: 36.545 - mean_q: 45.706 Interval 1202 (600500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2155 1 episodes - episode_reward: 202.506 [202.506, 202.506] - loss: 11.283 - mae: 36.383 - mean_q: 46.080 Interval 1203 (601000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4684 1 episodes - episode_reward: 152.179 [152.179, 152.179] - loss: 12.246 - mae: 36.802 - mean_q: 46.501 Interval 1204 (601500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1355 1 episodes - episode_reward: 258.177 [258.177, 258.177] - loss: 14.975 - mae: 37.218 - mean_q: 46.650 Interval 1205 (602000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.5132 4 episodes - episode_reward: -456.312 [-1016.885, -171.905] - loss: 12.358 - mae: 37.348 - mean_q: 46.649 Interval 1206 (602500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0079 1 episodes - episode_reward: -94.025 [-94.025, -94.025] - loss: 11.575 - mae: 37.496 - mean_q: 46.639 Interval 1207 (603000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5234 3 episodes - episode_reward: -59.327 [-192.440, 175.006] - loss: 15.047 - mae: 37.738 - mean_q: 46.810 Interval 1208 (603500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0757 Interval 1209 (604000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2701 2 episodes - episode_reward: -57.947 [-255.890, 139.996] - loss: 16.345 - mae: 38.201 - mean_q: 47.137 Interval 1210 (604500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4847 1 episodes - episode_reward: 175.934 [175.934, 175.934] - loss: 14.191 - mae: 38.203 - mean_q: 47.520 Interval 1211 (605000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0891 Interval 1212 (605500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1730 1 episodes - episode_reward: 203.132 [203.132, 203.132] - loss: 14.610 - mae: 38.508 - mean_q: 47.606 Interval 1213 (606000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5297 3 episodes - episode_reward: -265.485 [-558.417, -109.147] - loss: 13.631 - mae: 39.080 - mean_q: 48.404 Interval 1214 (606500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1358 2 episodes - episode_reward: -40.083 [-228.565, 148.398] - loss: 17.155 - mae: 39.172 - mean_q: 48.337 Interval 1215 (607000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3282 1 episodes - episode_reward: 210.012 [210.012, 210.012] - loss: 11.164 - mae: 39.628 - mean_q: 49.439 Interval 1216 (607500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4956 1 episodes - episode_reward: -293.625 [-293.625, -293.625] - loss: 14.562 - mae: 39.650 - mean_q: 48.830 Interval 1217 (608000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0933 1 episodes - episode_reward: -30.659 [-30.659, -30.659] - loss: 12.951 - mae: 39.802 - mean_q: 48.787 Interval 1218 (608500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0276 1 episodes - episode_reward: 139.756 [139.756, 139.756] - loss: 16.531 - mae: 39.690 - mean_q: 47.792 Interval 1219 (609000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4513 1 episodes - episode_reward: -288.741 [-288.741, -288.741] - loss: 14.618 - mae: 39.686 - mean_q: 48.308 Interval 1220 (609500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.1519 Interval 1221 (610000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.0079 2 episodes - episode_reward: 6.744 [-95.300, 108.789] - loss: 16.256 - mae: 39.620 - mean_q: 47.335 Interval 1222 (610500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8839 4 episodes - episode_reward: -249.475 [-509.808, -133.524] - loss: 13.864 - mae: 39.176 - mean_q: 47.036 Interval 1223 (611000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0622 Interval 1224 (611500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1743 1 episodes - episode_reward: 127.903 [127.903, 127.903] - loss: 14.293 - mae: 39.218 - mean_q: 46.730 Interval 1225 (612000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0989 Interval 1226 (612500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.1572 1 episodes - episode_reward: 111.301 [111.301, 111.301] - loss: 15.067 - mae: 39.870 - mean_q: 47.361 Interval 1227 (613000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2095 Interval 1228 (613500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8213 1 episodes - episode_reward: -524.435 [-524.435, -524.435] - loss: 18.749 - mae: 39.940 - mean_q: 47.816 Interval 1229 (614000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2499 1 episodes - episode_reward: -100.769 [-100.769, -100.769] - loss: 13.325 - mae: 40.085 - mean_q: 48.274 Interval 1230 (614500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1079 1 episodes - episode_reward: -63.371 [-63.371, -63.371] - loss: 14.680 - mae: 40.389 - mean_q: 48.786 Interval 1231 (615000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2967 1 episodes - episode_reward: 127.278 [127.278, 127.278] - loss: 15.206 - mae: 40.694 - mean_q: 48.510 Interval 1232 (615500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3608 3 episodes - episode_reward: -62.566 [-262.724, 200.948] - loss: 18.489 - mae: 40.604 - mean_q: 48.285 Interval 1233 (616000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0113 Interval 1234 (616500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0524 Interval 1235 (617000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0316 Interval 1236 (617500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.2197 1 episodes - episode_reward: 147.018 [147.018, 147.018] - loss: 20.457 - mae: 40.945 - mean_q: 49.530 Interval 1237 (618000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1066 Interval 1238 (618500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2916 2 episodes - episode_reward: -109.855 [-147.721, -71.989] - loss: 16.026 - mae: 41.531 - mean_q: 50.929 Interval 1239 (619000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1361 Interval 1240 (619500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0449 4 episodes - episode_reward: 32.105 [-103.828, 203.845] - loss: 14.393 - mae: 42.155 - mean_q: 52.184 Interval 1241 (620000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3857 4 episodes - episode_reward: -162.723 [-225.963, -85.882] - loss: 59.101 - mae: 42.231 - mean_q: 51.633 Interval 1242 (620500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2465 2 episodes - episode_reward: -121.235 [-140.109, -102.360] - loss: 12.950 - mae: 42.122 - mean_q: 51.731 Interval 1243 (621000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0181 2 episodes - episode_reward: 46.698 [-97.689, 191.084] - loss: 20.010 - mae: 42.729 - mean_q: 51.999 Interval 1244 (621500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3509 1 episodes - episode_reward: 150.818 [150.818, 150.818] - loss: 15.817 - mae: 42.777 - mean_q: 51.884 Interval 1245 (622000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2710 Interval 1246 (622500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6306 1 episodes - episode_reward: -215.004 [-215.004, -215.004] - loss: 15.565 - mae: 43.228 - mean_q: 52.675 Interval 1247 (623000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1608 1 episodes - episode_reward: -44.359 [-44.359, -44.359] - loss: 14.911 - mae: 44.186 - mean_q: 53.595 Interval 1248 (623500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0596 Interval 1249 (624000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3534 1 episodes - episode_reward: 223.474 [223.474, 223.474] - loss: 40.088 - mae: 44.986 - mean_q: 54.458 Interval 1250 (624500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4935 1 episodes - episode_reward: -44.254 [-44.254, -44.254] - loss: 22.748 - mae: 44.540 - mean_q: 54.512 Interval 1251 (625000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3605 2 episodes - episode_reward: -397.015 [-551.071, -242.959] - loss: 14.701 - mae: 44.910 - mean_q: 54.553 Interval 1252 (625500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2224 Interval 1253 (626000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0359 Interval 1254 (626500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.7304 3 episodes - episode_reward: 141.735 [-113.477, 355.021] - loss: 13.941 - mae: 45.767 - mean_q: 56.031 Interval 1255 (627000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2147 1 episodes - episode_reward: 154.491 [154.491, 154.491] - loss: 13.160 - mae: 46.119 - mean_q: 57.054 Interval 1256 (627500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4925 1 episodes - episode_reward: 274.521 [274.521, 274.521] - loss: 16.384 - mae: 46.580 - mean_q: 57.166 Interval 1257 (628000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3512 2 episodes - episode_reward: -111.059 [-134.913, -87.206] - loss: 16.931 - mae: 46.959 - mean_q: 58.384 Interval 1258 (628500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2257 1 episodes - episode_reward: -66.079 [-66.079, -66.079] - loss: 14.773 - mae: 47.441 - mean_q: 58.887 Interval 1259 (629000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0737 Interval 1260 (629500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4062 1 episodes - episode_reward: 92.302 [92.302, 92.302] - loss: 12.281 - mae: 47.393 - mean_q: 59.380 Interval 1261 (630000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0255 Interval 1262 (630500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0183 Interval 1263 (631000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4741 1 episodes - episode_reward: 210.559 [210.559, 210.559] - loss: 11.147 - mae: 47.723 - mean_q: 59.400 Interval 1264 (631500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2065 1 episodes - episode_reward: 234.742 [234.742, 234.742] - loss: 13.012 - mae: 47.852 - mean_q: 59.408 Interval 1265 (632000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0146 Interval 1266 (632500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1820 1 episodes - episode_reward: 113.814 [113.814, 113.814] - loss: 17.383 - mae: 48.397 - mean_q: 59.567 Interval 1267 (633000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0186 Interval 1268 (633500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.0292 Interval 1269 (634000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.2838 1 episodes - episode_reward: 84.826 [84.826, 84.826] - loss: 10.548 - mae: 49.173 - mean_q: 61.688 Interval 1270 (634500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4167 1 episodes - episode_reward: 228.790 [228.790, 228.790] - loss: 13.771 - mae: 49.457 - mean_q: 61.864 Interval 1271 (635000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0917 1 episodes - episode_reward: -49.603 [-49.603, -49.603] - loss: 16.232 - mae: 49.410 - mean_q: 62.599 Interval 1272 (635500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0617 Interval 1273 (636000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1413 Interval 1274 (636500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0411 Interval 1275 (637000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0448 Interval 1276 (637500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2424 1 episodes - episode_reward: -13.836 [-13.836, -13.836] - loss: 10.481 - mae: 49.416 - mean_q: 62.368 Interval 1277 (638000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.6569 1 episodes - episode_reward: 182.273 [182.273, 182.273] - loss: 13.035 - mae: 49.328 - mean_q: 62.481 Interval 1278 (638500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0748 2 episodes - episode_reward: 107.594 [-100.000, 315.188] - loss: 14.301 - mae: 49.777 - mean_q: 63.082 Interval 1279 (639000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1437 Interval 1280 (639500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3243 1 episodes - episode_reward: 45.124 [45.124, 45.124] - loss: 11.448 - mae: 49.027 - mean_q: 61.190 Interval 1281 (640000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5545 1 episodes - episode_reward: -225.406 [-225.406, -225.406] - loss: 11.497 - mae: 48.357 - mean_q: 60.675 Interval 1282 (640500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5277 2 episodes - episode_reward: -147.895 [-195.791, -100.000] - loss: 9.411 - mae: 48.247 - mean_q: 60.528 Interval 1283 (641000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0127 Interval 1284 (641500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2785 Interval 1285 (642000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.4635 1 episodes - episode_reward: 93.501 [93.501, 93.501] - loss: 10.945 - mae: 47.198 - mean_q: 59.499 Interval 1286 (642500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0597 Interval 1287 (643000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1282 2 episodes - episode_reward: -1.673 [-154.155, 150.808] - loss: 12.162 - mae: 47.008 - mean_q: 58.278 Interval 1288 (643500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3248 1 episodes - episode_reward: 122.229 [122.229, 122.229] - loss: 14.499 - mae: 47.034 - mean_q: 58.257 Interval 1289 (644000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0657 Interval 1290 (644500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2641 1 episodes - episode_reward: 155.773 [155.773, 155.773] - loss: 10.907 - mae: 46.762 - mean_q: 57.715 Interval 1291 (645000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1995 1 episodes - episode_reward: -127.063 [-127.063, -127.063] - loss: 15.063 - mae: 46.399 - mean_q: 57.530 Interval 1292 (645500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3071 2 episodes - episode_reward: -169.013 [-620.116, 282.089] - loss: 11.158 - mae: 46.063 - mean_q: 56.577 Interval 1293 (646000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1291 Interval 1294 (646500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0203 2 episodes - episode_reward: 46.790 [-109.192, 202.772] - loss: 10.230 - mae: 45.319 - mean_q: 56.373 Interval 1295 (647000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7576 3 episodes - episode_reward: -130.443 [-195.829, -61.320] - loss: 13.538 - mae: 45.241 - mean_q: 56.467 Interval 1296 (647500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4799 1 episodes - episode_reward: 231.415 [231.415, 231.415] - loss: 10.056 - mae: 45.489 - mean_q: 56.646 Interval 1297 (648000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1020 Interval 1298 (648500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0159 Interval 1299 (649000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7394 2 episodes - episode_reward: -183.689 [-576.470, 209.093] - loss: 10.958 - mae: 45.570 - mean_q: 56.125 Interval 1300 (649500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5146 1 episodes - episode_reward: -215.475 [-215.475, -215.475] - loss: 18.613 - mae: 45.691 - mean_q: 55.908 Interval 1301 (650000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0977 Interval 1302 (650500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.5991 1 episodes - episode_reward: 191.107 [191.107, 191.107] - loss: 9.124 - mae: 45.362 - mean_q: 56.321 Interval 1303 (651000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4094 1 episodes - episode_reward: -149.331 [-149.331, -149.331] - loss: 13.685 - mae: 45.080 - mean_q: 55.488 Interval 1304 (651500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2585 1 episodes - episode_reward: 173.684 [173.684, 173.684] - loss: 11.561 - mae: 45.063 - mean_q: 55.888 Interval 1305 (652000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3635 1 episodes - episode_reward: 193.902 [193.902, 193.902] - loss: 11.412 - mae: 44.809 - mean_q: 56.070 Interval 1306 (652500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0654 Interval 1307 (653000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4414 3 episodes - episode_reward: -83.424 [-177.116, 44.653] - loss: 8.832 - mae: 44.429 - mean_q: 55.583 Interval 1308 (653500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0817 Interval 1309 (654000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0622 Interval 1310 (654500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0157 2 episodes - episode_reward: -18.200 [-115.016, 78.617] - loss: 9.681 - mae: 43.756 - mean_q: 54.545 Interval 1311 (655000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3066 1 episodes - episode_reward: 184.701 [184.701, 184.701] - loss: 10.104 - mae: 43.843 - mean_q: 55.133 Interval 1312 (655500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0440 Interval 1313 (656000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.1858 1 episodes - episode_reward: 73.860 [73.860, 73.860] - loss: 9.800 - mae: 43.788 - mean_q: 55.054 Interval 1314 (656500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6646 2 episodes - episode_reward: -182.133 [-256.840, -107.426] - loss: 12.080 - mae: 43.747 - mean_q: 55.067 Interval 1315 (657000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6006 2 episodes - episode_reward: -133.382 [-166.765, -100.000] - loss: 11.270 - mae: 43.876 - mean_q: 54.646 Interval 1316 (657500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1048 Interval 1317 (658000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0046 2 episodes - episode_reward: -1.045 [-144.239, 142.149] - loss: 9.043 - mae: 43.124 - mean_q: 53.861 Interval 1318 (658500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1857 2 episodes - episode_reward: 73.359 [-120.437, 267.155] - loss: 10.272 - mae: 42.717 - mean_q: 53.822 Interval 1319 (659000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0983 1 episodes - episode_reward: -68.478 [-68.478, -68.478] - loss: 11.038 - mae: 42.782 - mean_q: 54.491 Interval 1320 (659500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4273 1 episodes - episode_reward: -140.545 [-140.545, -140.545] - loss: 12.526 - mae: 42.431 - mean_q: 53.859 Interval 1321 (660000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0969 Interval 1322 (660500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.3629 1 episodes - episode_reward: 64.973 [64.973, 64.973] - loss: 9.714 - mae: 42.248 - mean_q: 54.010 Interval 1323 (661000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4340 2 episodes - episode_reward: -123.296 [-215.806, -30.785] - loss: 21.120 - mae: 42.177 - mean_q: 53.688 Interval 1324 (661500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6050 1 episodes - episode_reward: -263.125 [-263.125, -263.125] - loss: 10.282 - mae: 41.848 - mean_q: 53.716 Interval 1325 (662000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0197 Interval 1326 (662500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8023 3 episodes - episode_reward: -140.365 [-361.045, 95.413] - loss: 11.080 - mae: 41.265 - mean_q: 52.971 Interval 1327 (663000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8567 2 episodes - episode_reward: -224.574 [-228.732, -220.416] - loss: 15.726 - mae: 41.179 - mean_q: 53.042 Interval 1328 (663500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1607 Interval 1329 (664000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.2215 3 episodes - episode_reward: -562.076 [-810.082, -200.284] - loss: 23.074 - mae: 41.091 - mean_q: 52.987 Interval 1330 (664500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2595 Interval 1331 (665000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5093 1 episodes - episode_reward: -406.590 [-406.590, -406.590] - loss: 9.896 - mae: 41.633 - mean_q: 53.548 Interval 1332 (665500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2041 Interval 1333 (666000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.7857 1 episodes - episode_reward: -470.885 [-470.885, -470.885] - loss: 13.404 - mae: 41.582 - mean_q: 53.366 Interval 1334 (666500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7140 Interval 1335 (667000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7035 1 episodes - episode_reward: -640.914 [-640.914, -640.914] - loss: 8.880 - mae: 41.557 - mean_q: 53.357 Interval 1336 (667500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2360 Interval 1337 (668000 steps performed) 500/500 [==============================] - 348s 698ms/step - reward: -0.9027 1 episodes - episode_reward: -580.637 [-580.637, -580.637] - loss: 13.215 - mae: 41.849 - mean_q: 54.032 Interval 1338 (668500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0974 Interval 1339 (669000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0424 Interval 1340 (669500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2005 1 episodes - episode_reward: 51.621 [51.621, 51.621] - loss: 11.849 - mae: 42.842 - mean_q: 54.573 Interval 1341 (670000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1849 1 episodes - episode_reward: -162.912 [-162.912, -162.912] - loss: 10.870 - mae: 43.083 - mean_q: 55.317 Interval 1342 (670500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8488 3 episodes - episode_reward: -288.748 [-613.194, -87.263] - loss: 14.481 - mae: 42.731 - mean_q: 54.910 Interval 1343 (671000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0308 Interval 1344 (671500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.2145 1 episodes - episode_reward: 135.776 [135.776, 135.776] - loss: 9.416 - mae: 43.092 - mean_q: 54.554 Interval 1345 (672000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1598 1 episodes - episode_reward: -67.798 [-67.798, -67.798] - loss: 11.123 - mae: 43.439 - mean_q: 55.038 Interval 1346 (672500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1853 1 episodes - episode_reward: 232.744 [232.744, 232.744] - loss: 9.896 - mae: 43.682 - mean_q: 55.230 Interval 1347 (673000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.4249 1 episodes - episode_reward: 149.039 [149.039, 149.039] - loss: 10.794 - mae: 43.314 - mean_q: 54.815 Interval 1348 (673500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.6379 2 episodes - episode_reward: -48.869 [-136.355, 38.617] - loss: 24.246 - mae: 43.414 - mean_q: 55.021 Interval 1349 (674000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.6998 1 episodes - episode_reward: -524.764 [-524.764, -524.764] - loss: 12.049 - mae: 43.666 - mean_q: 55.256 Interval 1350 (674500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.3497 1 episodes - episode_reward: 176.799 [176.799, 176.799] - loss: 13.324 - mae: 43.557 - mean_q: 54.773 Interval 1351 (675000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.3444 1 episodes - episode_reward: 164.697 [164.697, 164.697] - loss: 10.496 - mae: 43.527 - mean_q: 54.436 Interval 1352 (675500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.2927 1 episodes - episode_reward: 265.905 [265.905, 265.905] - loss: 15.333 - mae: 43.632 - mean_q: 54.221 Interval 1353 (676000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.4116 2 episodes - episode_reward: -258.106 [-407.101, -109.111] - loss: 11.644 - mae: 44.488 - mean_q: 55.572 Interval 1354 (676500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.1033 1 episodes - episode_reward: 318.635 [318.635, 318.635] - loss: 14.313 - mae: 44.998 - mean_q: 55.801 Interval 1355 (677000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2086 Interval 1356 (677500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -1.3736 1 episodes - episode_reward: -448.043 [-448.043, -448.043] - loss: 14.082 - mae: 45.183 - mean_q: 56.285 Interval 1357 (678000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -1.7240 4 episodes - episode_reward: -318.470 [-887.929, -100.000] - loss: 12.515 - mae: 45.438 - mean_q: 56.051 Interval 1358 (678500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1672 Interval 1359 (679000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -1.1503 Interval 1360 (679500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.2030 4 episodes - episode_reward: -577.677 [-824.225, -140.164] - loss: 14.872 - mae: 46.793 - mean_q: 57.200 Interval 1361 (680000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.7112 1 episodes - episode_reward: 281.324 [281.324, 281.324] - loss: 26.171 - mae: 47.301 - mean_q: 57.403 Interval 1362 (680500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2380 1 episodes - episode_reward: 28.354 [28.354, 28.354] - loss: 16.802 - mae: 47.940 - mean_q: 58.340 Interval 1363 (681000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.9641 Interval 1364 (681500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.3568 2 episodes - episode_reward: -494.340 [-981.220, -7.460] - loss: 13.900 - mae: 48.886 - mean_q: 58.284 Interval 1365 (682000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3009 Interval 1366 (682500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0017 3 episodes - episode_reward: -274.796 [-454.223, -151.835] - loss: 14.683 - mae: 50.493 - mean_q: 60.397 Interval 1367 (683000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0458 Interval 1368 (683500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0458 Interval 1369 (684000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -1.7088 2 episodes - episode_reward: -241.829 [-326.879, -156.778] - loss: 12.827 - mae: 52.029 - mean_q: 62.433 Interval 1370 (684500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0115 1 episodes - episode_reward: -510.136 [-510.136, -510.136] - loss: 19.515 - mae: 52.738 - mean_q: 62.715 Interval 1371 (685000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0109 Interval 1372 (685500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1985 Interval 1373 (686000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0505 3 episodes - episode_reward: -5.510 [-88.134, 112.487] - loss: 13.499 - mae: 53.709 - mean_q: 64.228 Interval 1374 (686500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2556 1 episodes - episode_reward: -109.482 [-109.482, -109.482] - loss: 19.588 - mae: 53.659 - mean_q: 63.945 Interval 1375 (687000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2251 Interval 1376 (687500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2022 Interval 1377 (688000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1070 1 episodes - episode_reward: -96.484 [-96.484, -96.484] - loss: 20.245 - mae: 54.249 - mean_q: 64.951 Interval 1378 (688500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0354 Interval 1379 (689000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.2276 1 episodes - episode_reward: 179.756 [179.756, 179.756] - loss: 22.282 - mae: 53.745 - mean_q: 64.533 Interval 1380 (689500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6521 1 episodes - episode_reward: -214.265 [-214.265, -214.265] - loss: 17.157 - mae: 54.408 - mean_q: 65.984 Interval 1381 (690000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0517 Interval 1382 (690500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3107 2 episodes - episode_reward: -206.333 [-230.260, -182.406] - loss: 27.594 - mae: 55.153 - mean_q: 67.454 Interval 1383 (691000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0660 Interval 1384 (691500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0421 Interval 1385 (692000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0354 Interval 1386 (692500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0420 Interval 1387 (693000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0283 Interval 1388 (693500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0142 Interval 1389 (694000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6362 3 episodes - episode_reward: -47.377 [-258.749, 216.617] - loss: 19.328 - mae: 53.382 - mean_q: 65.972 Interval 1390 (694500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0320 Interval 1391 (695000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.0569 Interval 1392 (695500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0359 1 episodes - episode_reward: 47.127 [47.127, 47.127] - loss: 29.220 - mae: 53.920 - mean_q: 66.811 Interval 1393 (696000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1847 1 episodes - episode_reward: -173.015 [-173.015, -173.015] - loss: 15.302 - mae: 54.260 - mean_q: 68.158 Interval 1394 (696500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.0695 Interval 1395 (697000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0249 Interval 1396 (697500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.0657 Interval 1397 (698000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: 0.0446 1 episodes - episode_reward: 78.113 [78.113, 78.113] - loss: 20.329 - mae: 54.045 - mean_q: 67.043 Interval 1398 (698500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2088 1 episodes - episode_reward: -111.804 [-111.804, -111.804] - loss: 17.976 - mae: 54.124 - mean_q: 67.496 Interval 1399 (699000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5079 1 episodes - episode_reward: -292.535 [-292.535, -292.535] - loss: 14.699 - mae: 55.045 - mean_q: 68.732 Interval 1400 (699500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1382 1 episodes - episode_reward: -256.836 [-256.836, -256.836] - loss: 18.597 - mae: 54.982 - mean_q: 68.537 Interval 1401 (700000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3959 1 episodes - episode_reward: 250.976 [250.976, 250.976] - loss: 17.931 - mae: 54.442 - mean_q: 67.915 Interval 1402 (700500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2034 1 episodes - episode_reward: 222.350 [222.350, 222.350] - loss: 20.542 - mae: 54.398 - mean_q: 68.158 Interval 1403 (701000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1883 2 episodes - episode_reward: -64.094 [-64.857, -63.332] - loss: 21.468 - mae: 54.224 - mean_q: 68.458 Interval 1404 (701500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0506 1 episodes - episode_reward: -58.911 [-58.911, -58.911] - loss: 14.856 - mae: 53.802 - mean_q: 67.860 Interval 1405 (702000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3019 1 episodes - episode_reward: 16.378 [16.378, 16.378] - loss: 28.652 - mae: 54.115 - mean_q: 68.014 Interval 1406 (702500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2230 Interval 1407 (703000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.6911 2 episodes - episode_reward: -254.246 [-384.155, -124.337] - loss: 16.871 - mae: 54.896 - mean_q: 70.496 Interval 1408 (703500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2858 2 episodes - episode_reward: -50.182 [-51.857, -48.508] - loss: 30.846 - mae: 54.862 - mean_q: 70.646 Interval 1409 (704000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9719 4 episodes - episode_reward: -120.501 [-158.018, -100.000] - loss: 17.112 - mae: 55.942 - mean_q: 72.385 Interval 1410 (704500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1244 Interval 1411 (705000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -1.1253 Interval 1412 (705500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2517 1 episodes - episode_reward: -632.582 [-632.582, -632.582] - loss: 23.276 - mae: 55.556 - mean_q: 71.272 Interval 1413 (706000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1691 Interval 1414 (706500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3360 Interval 1415 (707000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2803 3 episodes - episode_reward: -360.602 [-799.535, -100.000] - loss: 30.062 - mae: 56.055 - mean_q: 71.911 Interval 1416 (707500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1391 Interval 1417 (708000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2263 Interval 1418 (708500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1285 Interval 1419 (709000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8249 2 episodes - episode_reward: -301.110 [-475.325, -126.894] - loss: 14.464 - mae: 54.564 - mean_q: 69.095 Interval 1420 (709500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2684 Interval 1421 (710000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3088 Interval 1422 (710500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.0243 Interval 1423 (711000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3472 2 episodes - episode_reward: -244.265 [-337.489, -151.041] - loss: 20.191 - mae: 52.772 - mean_q: 67.243 Interval 1424 (711500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0298 Interval 1425 (712000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0375 Interval 1426 (712500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2320 1 episodes - episode_reward: 96.852 [96.852, 96.852] - loss: 12.474 - mae: 52.664 - mean_q: 66.803 Interval 1427 (713000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0240 Interval 1428 (713500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8948 2 episodes - episode_reward: -222.335 [-560.942, 116.273] - loss: 20.755 - mae: 52.593 - mean_q: 66.712 Interval 1429 (714000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8641 Interval 1430 (714500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2044 1 episodes - episode_reward: -466.558 [-466.558, -466.558] - loss: 19.068 - mae: 52.248 - mean_q: 66.299 Interval 1431 (715000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.7474 Interval 1432 (715500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5862 2 episodes - episode_reward: -326.653 [-915.178, 261.872] - loss: 14.759 - mae: 52.651 - mean_q: 67.589 Interval 1433 (716000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2704 Interval 1434 (716500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0754 3 episodes - episode_reward: -287.799 [-632.718, -31.447] - loss: 19.720 - mae: 52.583 - mean_q: 67.677 Interval 1435 (717000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2946 1 episodes - episode_reward: -458.791 [-458.791, -458.791] - loss: 18.197 - mae: 52.802 - mean_q: 68.153 Interval 1436 (717500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0821 Interval 1437 (718000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6448 3 episodes - episode_reward: -26.800 [-125.922, 154.378] - loss: 12.064 - mae: 51.823 - mean_q: 66.943 Interval 1438 (718500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1650 1 episodes - episode_reward: -219.451 [-219.451, -219.451] - loss: 22.069 - mae: 51.196 - mean_q: 65.905 Interval 1439 (719000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0053 Interval 1440 (719500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1086 1 episodes - episode_reward: -174.869 [-174.869, -174.869] - loss: 19.859 - mae: 50.527 - mean_q: 64.707 Interval 1441 (720000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1387 2 episodes - episode_reward: 25.704 [-223.415, 274.823] - loss: 16.083 - mae: 49.968 - mean_q: 64.429 Interval 1442 (720500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4007 1 episodes - episode_reward: 211.450 [211.450, 211.450] - loss: 13.241 - mae: 49.182 - mean_q: 63.356 Interval 1443 (721000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2595 Interval 1444 (721500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3075 2 episodes - episode_reward: -145.398 [-169.130, -121.665] - loss: 14.608 - mae: 48.336 - mean_q: 62.002 Interval 1445 (722000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0327 1 episodes - episode_reward: -65.947 [-65.947, -65.947] - loss: 19.595 - mae: 47.210 - mean_q: 60.175 Interval 1446 (722500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5058 1 episodes - episode_reward: -168.192 [-168.192, -168.192] - loss: 15.064 - mae: 46.803 - mean_q: 60.357 Interval 1447 (723000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2645 1 episodes - episode_reward: -163.830 [-163.830, -163.830] - loss: 15.575 - mae: 46.041 - mean_q: 58.924 Interval 1448 (723500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1504 Interval 1449 (724000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2282 3 episodes - episode_reward: 30.481 [-100.000, 124.811] - loss: 12.559 - mae: 44.741 - mean_q: 57.327 Interval 1450 (724500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0850 Interval 1451 (725000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1918 Interval 1452 (725500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3381 1 episodes - episode_reward: -9.278 [-9.278, -9.278] - loss: 12.752 - mae: 43.952 - mean_q: 55.414 Interval 1453 (726000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1256 1 episodes - episode_reward: 214.232 [214.232, 214.232] - loss: 13.590 - mae: 42.770 - mean_q: 53.771 Interval 1454 (726500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2143 Interval 1455 (727000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.7739 1 episodes - episode_reward: -384.689 [-384.689, -384.689] - loss: 12.717 - mae: 42.714 - mean_q: 53.333 Interval 1456 (727500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -6.2873e-04 1 episodes - episode_reward: 18.215 [18.215, 18.215] - loss: 13.551 - mae: 42.092 - mean_q: 52.164 Interval 1457 (728000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0565 3 episodes - episode_reward: -25.478 [-121.176, 150.202] - loss: 9.656 - mae: 41.612 - mean_q: 51.781 Interval 1458 (728500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0670 2 episodes - episode_reward: 15.217 [-13.823, 44.256] - loss: 20.060 - mae: 41.987 - mean_q: 52.445 Interval 1459 (729000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0584 Interval 1460 (729500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0442 Interval 1461 (730000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4960 2 episodes - episode_reward: -107.407 [-114.815, -100.000] - loss: 15.465 - mae: 40.799 - mean_q: 51.449 Interval 1462 (730500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1970 Interval 1463 (731000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0952 Interval 1464 (731500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1811 1 episodes - episode_reward: 201.234 [201.234, 201.234] - loss: 13.402 - mae: 41.135 - mean_q: 52.466 Interval 1465 (732000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1787 Interval 1466 (732500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3360 2 episodes - episode_reward: -171.791 [-267.420, -76.162] - loss: 12.210 - mae: 40.488 - mean_q: 52.363 Interval 1467 (733000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2177 1 episodes - episode_reward: 174.876 [174.876, 174.876] - loss: 12.587 - mae: 40.660 - mean_q: 52.709 Interval 1468 (733500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2618 2 episodes - episode_reward: -63.159 [-84.054, -42.264] - loss: 16.512 - mae: 41.773 - mean_q: 54.492 Interval 1469 (734000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1296 Interval 1470 (734500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1257 Interval 1471 (735000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1558 2 episodes - episode_reward: -156.217 [-244.609, -67.825] - loss: 16.892 - mae: 41.709 - mean_q: 53.984 Interval 1472 (735500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4093 1 episodes - episode_reward: 201.202 [201.202, 201.202] - loss: 18.693 - mae: 42.198 - mean_q: 54.500 Interval 1473 (736000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1129 Interval 1474 (736500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2240 1 episodes - episode_reward: 223.160 [223.160, 223.160] - loss: 13.256 - mae: 42.014 - mean_q: 54.401 Interval 1475 (737000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3140 1 episodes - episode_reward: -139.949 [-139.949, -139.949] - loss: 9.911 - mae: 41.305 - mean_q: 53.599 Interval 1476 (737500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0408 Interval 1477 (738000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5631 3 episodes - episode_reward: -103.225 [-207.268, 10.157] - loss: 14.765 - mae: 40.775 - mean_q: 52.631 Interval 1478 (738500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1310 Interval 1479 (739000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1214 1 episodes - episode_reward: -178.533 [-178.533, -178.533] - loss: 18.429 - mae: 42.437 - mean_q: 55.161 Interval 1480 (739500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2021 1 episodes - episode_reward: -15.776 [-15.776, -15.776] - loss: 11.929 - mae: 41.480 - mean_q: 54.058 Interval 1481 (740000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0061 Interval 1482 (740500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0341 Interval 1483 (741000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0532 Interval 1484 (741500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0922 3 episodes - episode_reward: -37.793 [-72.481, 21.487] - loss: 13.223 - mae: 42.548 - mean_q: 55.219 Interval 1485 (742000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1802 3 episodes - episode_reward: -15.213 [-116.579, 177.499] - loss: 13.770 - mae: 43.304 - mean_q: 56.245 Interval 1486 (742500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3118 1 episodes - episode_reward: -105.492 [-105.492, -105.492] - loss: 15.152 - mae: 43.835 - mean_q: 57.203 Interval 1487 (743000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0707 Interval 1488 (743500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2990 Interval 1489 (744000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1175 4 episodes - episode_reward: -187.268 [-399.634, -107.915] - loss: 12.661 - mae: 43.475 - mean_q: 57.224 Interval 1490 (744500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1100 2 episodes - episode_reward: -51.885 [-92.470, -11.300] - loss: 19.040 - mae: 43.680 - mean_q: 57.024 Interval 1491 (745000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0633 Interval 1492 (745500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2181 Interval 1493 (746000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1647 Interval 1494 (746500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3746 1 episodes - episode_reward: -364.614 [-364.614, -364.614] - loss: 14.635 - mae: 43.734 - mean_q: 56.618 Interval 1495 (747000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0950 Interval 1496 (747500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6141 4 episodes - episode_reward: -64.124 [-178.685, 147.036] - loss: 15.136 - mae: 44.548 - mean_q: 57.860 Interval 1497 (748000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1459 3 episodes - episode_reward: -199.745 [-343.414, -16.273] - loss: 16.409 - mae: 45.893 - mean_q: 59.540 Interval 1498 (748500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0941 1 episodes - episode_reward: 0.520 [0.520, 0.520] - loss: 14.942 - mae: 45.250 - mean_q: 58.473 Interval 1499 (749000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3974 3 episodes - episode_reward: 7.906 [-125.359, 250.469] - loss: 16.329 - mae: 45.210 - mean_q: 58.377 Interval 1500 (749500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2881 1 episodes - episode_reward: -393.495 [-393.495, -393.495] - loss: 19.806 - mae: 44.949 - mean_q: 58.337 Interval 1501 (750000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1905 2 episodes - episode_reward: 7.353 [-229.061, 243.766] - loss: 13.376 - mae: 45.781 - mean_q: 59.078 Interval 1502 (750500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0333 2 episodes - episode_reward: -220.044 [-283.772, -156.317] - loss: 12.509 - mae: 45.554 - mean_q: 58.319 Interval 1503 (751000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0948 1 episodes - episode_reward: -145.915 [-145.915, -145.915] - loss: 16.408 - mae: 46.219 - mean_q: 58.829 Interval 1504 (751500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5333 1 episodes - episode_reward: -96.228 [-96.228, -96.228] - loss: 15.841 - mae: 46.029 - mean_q: 58.845 Interval 1505 (752000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6328 3 episodes - episode_reward: -133.252 [-233.642, -66.114] - loss: 14.218 - mae: 46.448 - mean_q: 59.411 Interval 1506 (752500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2151 5 episodes - episode_reward: -105.429 [-131.548, -77.246] - loss: 14.664 - mae: 46.941 - mean_q: 59.617 Interval 1507 (753000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2765 1 episodes - episode_reward: -70.154 [-70.154, -70.154] - loss: 13.903 - mae: 46.701 - mean_q: 59.915 Interval 1508 (753500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0304 1 episodes - episode_reward: -194.820 [-194.820, -194.820] - loss: 16.285 - mae: 47.217 - mean_q: 59.955 Interval 1509 (754000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1139 1 episodes - episode_reward: 194.780 [194.780, 194.780] - loss: 16.603 - mae: 46.451 - mean_q: 59.257 Interval 1510 (754500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2988 3 episodes - episode_reward: -245.707 [-289.370, -207.681] - loss: 17.090 - mae: 46.537 - mean_q: 58.920 Interval 1511 (755000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7369 3 episodes - episode_reward: -126.770 [-244.164, -38.904] - loss: 17.683 - mae: 46.842 - mean_q: 59.366 Interval 1512 (755500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0016 1 episodes - episode_reward: -33.581 [-33.581, -33.581] - loss: 14.210 - mae: 45.915 - mean_q: 58.355 Interval 1513 (756000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0098 Interval 1514 (756500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4111 2 episodes - episode_reward: -69.806 [-274.243, 134.631] - loss: 11.715 - mae: 45.531 - mean_q: 57.781 Interval 1515 (757000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2277 1 episodes - episode_reward: -77.454 [-77.454, -77.454] - loss: 13.460 - mae: 45.609 - mean_q: 58.058 Interval 1516 (757500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2476 3 episodes - episode_reward: -235.617 [-329.538, -172.752] - loss: 16.529 - mae: 45.511 - mean_q: 58.144 Interval 1517 (758000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1205 1 episodes - episode_reward: 28.776 [28.776, 28.776] - loss: 14.637 - mae: 46.085 - mean_q: 58.572 Interval 1518 (758500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2277 1 episodes - episode_reward: 153.475 [153.475, 153.475] - loss: 19.539 - mae: 45.758 - mean_q: 57.867 Interval 1519 (759000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7702 3 episodes - episode_reward: -137.949 [-187.771, -84.739] - loss: 13.444 - mae: 45.274 - mean_q: 57.881 Interval 1520 (759500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1679 2 episodes - episode_reward: -71.352 [-92.422, -50.282] - loss: 14.352 - mae: 45.021 - mean_q: 57.647 Interval 1521 (760000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0735 1 episodes - episode_reward: 7.100 [7.100, 7.100] - loss: 15.652 - mae: 45.584 - mean_q: 58.366 Interval 1522 (760500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1474 Interval 1523 (761000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3233 1 episodes - episode_reward: 182.167 [182.167, 182.167] - loss: 14.424 - mae: 45.191 - mean_q: 57.937 Interval 1524 (761500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0626 Interval 1525 (762000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0530 Interval 1526 (762500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0437 Interval 1527 (763000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0646 Interval 1528 (763500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.0973 Interval 1529 (764000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1727 Interval 1530 (764500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.5439 2 episodes - episode_reward: -247.182 [-420.982, -73.382] - loss: 15.946 - mae: 44.156 - mean_q: 56.640 Interval 1531 (765000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0747 2 episodes - episode_reward: -237.177 [-296.527, -177.828] - loss: 13.007 - mae: 43.009 - mean_q: 55.172 Interval 1532 (765500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3916 3 episodes - episode_reward: -227.923 [-320.393, -177.654] - loss: 14.619 - mae: 42.595 - mean_q: 54.313 Interval 1533 (766000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0148 Interval 1534 (766500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8201 5 episodes - episode_reward: -187.257 [-358.679, -109.951] - loss: 15.837 - mae: 41.589 - mean_q: 52.377 Interval 1535 (767000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1315 5 episodes - episode_reward: -117.610 [-168.219, -61.121] - loss: 15.259 - mae: 40.894 - mean_q: 50.988 Interval 1536 (767500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6185 2 episodes - episode_reward: -161.818 [-173.000, -150.636] - loss: 12.925 - mae: 41.083 - mean_q: 50.671 Interval 1537 (768000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0306 2 episodes - episode_reward: -42.414 [-71.009, -13.819] - loss: 12.980 - mae: 41.641 - mean_q: 51.490 Interval 1538 (768500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4475 2 episodes - episode_reward: -74.439 [-137.283, -11.594] - loss: 17.669 - mae: 41.641 - mean_q: 51.114 Interval 1539 (769000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1626 1 episodes - episode_reward: 0.217 [0.217, 0.217] - loss: 14.914 - mae: 41.507 - mean_q: 51.255 Interval 1540 (769500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0488 Interval 1541 (770000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0423 Interval 1542 (770500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1411 Interval 1543 (771000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0844 Interval 1544 (771500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0692 Interval 1545 (772000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1091 Interval 1546 (772500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.0190 Interval 1547 (773000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.0487 Interval 1548 (773500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.0256 Interval 1549 (774000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.0567 Interval 1550 (774500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.2798 2 episodes - episode_reward: -167.889 [-221.737, -114.040] - loss: 13.807 - mae: 39.093 - mean_q: 48.544 Interval 1551 (775000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8864 2 episodes - episode_reward: -224.503 [-247.265, -201.740] - loss: 16.043 - mae: 38.420 - mean_q: 47.171 Interval 1552 (775500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2275 2 episodes - episode_reward: -99.311 [-122.347, -76.275] - loss: 13.590 - mae: 37.656 - mean_q: 46.695 Interval 1553 (776000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4124 1 episodes - episode_reward: 203.140 [203.140, 203.140] - loss: 13.459 - mae: 37.310 - mean_q: 46.222 Interval 1554 (776500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0060 Interval 1555 (777000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: 0.0146 Interval 1556 (777500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0450 Interval 1557 (778000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0688 Interval 1558 (778500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0349 Interval 1559 (779000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0351 Interval 1560 (779500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0116 Interval 1561 (780000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.1453 1 episodes - episode_reward: 82.487 [82.487, 82.487] - loss: 14.238 - mae: 36.682 - mean_q: 46.252 Interval 1562 (780500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1863 2 episodes - episode_reward: -52.952 [-136.253, 30.349] - loss: 13.830 - mae: 36.868 - mean_q: 46.353 Interval 1563 (781000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1093 Interval 1564 (781500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.8341 1 episodes - episode_reward: -458.960 [-458.960, -458.960] - loss: 14.553 - mae: 37.641 - mean_q: 47.077 Interval 1565 (782000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3406 1 episodes - episode_reward: -153.487 [-153.487, -153.487] - loss: 13.573 - mae: 38.195 - mean_q: 47.910 Interval 1566 (782500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3736 1 episodes - episode_reward: -209.465 [-209.465, -209.465] - loss: 12.803 - mae: 38.463 - mean_q: 47.948 Interval 1567 (783000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4223 1 episodes - episode_reward: -195.902 [-195.902, -195.902] - loss: 20.650 - mae: 38.387 - mean_q: 47.779 Interval 1568 (783500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0955 1 episodes - episode_reward: -545.711 [-545.711, -545.711] - loss: 14.805 - mae: 38.642 - mean_q: 48.431 Interval 1569 (784000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0347 1 episodes - episode_reward: -326.787 [-326.787, -326.787] - loss: 17.432 - mae: 38.598 - mean_q: 48.042 Interval 1570 (784500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9322 3 episodes - episode_reward: -249.340 [-372.496, -174.289] - loss: 17.801 - mae: 38.806 - mean_q: 48.267 Interval 1571 (785000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2779 Interval 1572 (785500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3012 2 episodes - episode_reward: -105.946 [-167.004, -44.888] - loss: 18.010 - mae: 38.657 - mean_q: 48.115 Interval 1573 (786000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2032 Interval 1574 (786500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0817 Interval 1575 (787000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1487 1 episodes - episode_reward: -54.040 [-54.040, -54.040] - loss: 17.556 - mae: 37.973 - mean_q: 47.701 Interval 1576 (787500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8343 2 episodes - episode_reward: -200.689 [-223.160, -178.218] - loss: 15.598 - mae: 37.714 - mean_q: 47.469 Interval 1577 (788000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1934 Interval 1578 (788500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1200 Interval 1579 (789000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1914 Interval 1580 (789500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.9374 2 episodes - episode_reward: -364.081 [-535.661, -192.501] - loss: 14.253 - mae: 36.949 - mean_q: 46.548 Interval 1581 (790000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1586 Interval 1582 (790500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1916 Interval 1583 (791000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9893 3 episodes - episode_reward: -234.347 [-383.214, -141.181] - loss: 13.429 - mae: 36.714 - mean_q: 45.602 Interval 1584 (791500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3710 1 episodes - episode_reward: -245.110 [-245.110, -245.110] - loss: 17.608 - mae: 36.882 - mean_q: 45.789 Interval 1585 (792000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7796 2 episodes - episode_reward: -210.454 [-219.637, -201.271] - loss: 13.990 - mae: 36.693 - mean_q: 45.682 Interval 1586 (792500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1532 2 episodes - episode_reward: -2.748 [-244.254, 238.757] - loss: 15.326 - mae: 37.017 - mean_q: 45.659 Interval 1587 (793000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2266 Interval 1588 (793500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3955 3 episodes - episode_reward: -36.833 [-270.796, 238.894] - loss: 16.265 - mae: 37.800 - mean_q: 46.850 Interval 1589 (794000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1440 Interval 1590 (794500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8102 2 episodes - episode_reward: -229.392 [-263.024, -195.760] - loss: 16.761 - mae: 37.943 - mean_q: 46.843 Interval 1591 (795000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1948 1 episodes - episode_reward: 101.840 [101.840, 101.840] - loss: 15.466 - mae: 37.881 - mean_q: 46.113 Interval 1592 (795500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6415 1 episodes - episode_reward: -309.544 [-309.544, -309.544] - loss: 15.044 - mae: 38.444 - mean_q: 45.774 Interval 1593 (796000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2002 1 episodes - episode_reward: -204.464 [-204.464, -204.464] - loss: 17.149 - mae: 38.465 - mean_q: 46.792 Interval 1594 (796500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0376 Interval 1595 (797000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5015 3 episodes - episode_reward: -43.734 [-189.248, 200.316] - loss: 14.727 - mae: 38.346 - mean_q: 46.357 Interval 1596 (797500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1488 Interval 1597 (798000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0391 Interval 1598 (798500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.2642 1 episodes - episode_reward: 171.360 [171.360, 171.360] - loss: 15.514 - mae: 38.581 - mean_q: 46.456 Interval 1599 (799000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0673 Interval 1600 (799500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1203 1 episodes - episode_reward: 96.427 [96.427, 96.427] - loss: 14.356 - mae: 37.893 - mean_q: 46.216 Interval 1601 (800000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4874 1 episodes - episode_reward: -190.176 [-190.176, -190.176] - loss: 14.619 - mae: 37.805 - mean_q: 46.101 Interval 1602 (800500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9605 2 episodes - episode_reward: -263.727 [-273.297, -254.156] - loss: 13.226 - mae: 37.779 - mean_q: 45.697 Interval 1603 (801000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8808 2 episodes - episode_reward: -255.433 [-268.316, -242.551] - loss: 19.513 - mae: 37.709 - mean_q: 45.338 Interval 1604 (801500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1235 Interval 1605 (802000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2457 1 episodes - episode_reward: 205.305 [205.305, 205.305] - loss: 18.274 - mae: 37.439 - mean_q: 45.051 Interval 1606 (802500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2415 Interval 1607 (803000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6706 2 episodes - episode_reward: -215.597 [-255.708, -175.486] - loss: 13.058 - mae: 37.167 - mean_q: 43.810 Interval 1608 (803500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3804 1 episodes - episode_reward: -230.280 [-230.280, -230.280] - loss: 11.129 - mae: 37.581 - mean_q: 44.759 Interval 1609 (804000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0813 Interval 1610 (804500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8692 2 episodes - episode_reward: -228.616 [-230.370, -226.863] - loss: 17.697 - mae: 37.753 - mean_q: 44.932 Interval 1611 (805000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2606 1 episodes - episode_reward: -172.607 [-172.607, -172.607] - loss: 15.033 - mae: 37.089 - mean_q: 44.447 Interval 1612 (805500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0779 Interval 1613 (806000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1929 Interval 1614 (806500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.9595 3 episodes - episode_reward: -291.288 [-541.428, -100.000] - loss: 14.874 - mae: 36.673 - mean_q: 44.095 Interval 1615 (807000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7405 2 episodes - episode_reward: -249.194 [-417.622, -80.767] - loss: 13.482 - mae: 36.298 - mean_q: 43.942 Interval 1616 (807500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1269 2 episodes - episode_reward: 9.160 [-204.084, 222.403] - loss: 12.695 - mae: 35.931 - mean_q: 43.718 Interval 1617 (808000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4462 2 episodes - episode_reward: -108.957 [-185.686, -32.228] - loss: 13.918 - mae: 35.516 - mean_q: 43.146 Interval 1618 (808500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0314 2 episodes - episode_reward: 20.276 [-104.570, 145.123] - loss: 18.884 - mae: 35.216 - mean_q: 42.293 Interval 1619 (809000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3903 3 episodes - episode_reward: -216.435 [-324.825, -54.555] - loss: 19.605 - mae: 34.922 - mean_q: 42.441 Interval 1620 (809500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4249 1 episodes - episode_reward: 129.846 [129.846, 129.846] - loss: 11.685 - mae: 34.620 - mean_q: 42.869 Interval 1621 (810000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1485 1 episodes - episode_reward: -112.321 [-112.321, -112.321] - loss: 13.032 - mae: 34.716 - mean_q: 43.184 Interval 1622 (810500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1663 Interval 1623 (811000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2174 Interval 1624 (811500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.9922 1 episodes - episode_reward: -683.357 [-683.357, -683.357] - loss: 12.480 - mae: 35.006 - mean_q: 43.070 Interval 1625 (812000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1974 4 episodes - episode_reward: -269.021 [-471.835, -141.003] - loss: 15.275 - mae: 35.294 - mean_q: 43.310 Interval 1626 (812500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7472 3 episodes - episode_reward: -131.641 [-201.278, -78.548] - loss: 13.218 - mae: 35.757 - mean_q: 43.589 Interval 1627 (813000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1557 Interval 1628 (813500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1187 Interval 1629 (814000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0620 1 episodes - episode_reward: -250.888 [-250.888, -250.888] - loss: 17.137 - mae: 36.637 - mean_q: 43.371 Interval 1630 (814500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0056 Interval 1631 (815000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0431 Interval 1632 (815500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2341 2 episodes - episode_reward: -25.584 [-234.063, 182.895] - loss: 18.329 - mae: 36.979 - mean_q: 44.705 Interval 1633 (816000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5159 1 episodes - episode_reward: -316.365 [-316.365, -316.365] - loss: 13.451 - mae: 36.824 - mean_q: 44.972 Interval 1634 (816500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8296 4 episodes - episode_reward: -91.762 [-299.724, 208.675] - loss: 16.970 - mae: 37.176 - mean_q: 46.012 Interval 1635 (817000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0505 1 episodes - episode_reward: -24.100 [-24.100, -24.100] - loss: 19.274 - mae: 37.594 - mean_q: 46.317 Interval 1636 (817500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1256 Interval 1637 (818000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3243 1 episodes - episode_reward: 187.043 [187.043, 187.043] - loss: 17.240 - mae: 37.400 - mean_q: 46.703 Interval 1638 (818500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3607 2 episodes - episode_reward: -82.277 [-366.033, 201.479] - loss: 13.331 - mae: 36.890 - mean_q: 45.923 Interval 1639 (819000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0579 Interval 1640 (819500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2972 1 episodes - episode_reward: 196.869 [196.869, 196.869] - loss: 10.893 - mae: 37.031 - mean_q: 46.071 Interval 1641 (820000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0454 Interval 1642 (820500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7510 3 episodes - episode_reward: -109.153 [-124.994, -97.663] - loss: 15.255 - mae: 37.281 - mean_q: 46.621 Interval 1643 (821000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5360 2 episodes - episode_reward: -176.110 [-308.021, -44.199] - loss: 9.362 - mae: 37.278 - mean_q: 46.718 Interval 1644 (821500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0243 Interval 1645 (822000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0914 2 episodes - episode_reward: 66.744 [9.001, 124.487] - loss: 16.002 - mae: 37.653 - mean_q: 47.590 Interval 1646 (822500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6549 2 episodes - episode_reward: -192.948 [-208.853, -177.043] - loss: 14.122 - mae: 37.845 - mean_q: 47.522 Interval 1647 (823000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2046 2 episodes - episode_reward: -89.569 [-91.816, -87.323] - loss: 13.794 - mae: 37.836 - mean_q: 46.811 Interval 1648 (823500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2253 Interval 1649 (824000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3339 1 episodes - episode_reward: 290.536 [290.536, 290.536] - loss: 14.452 - mae: 38.243 - mean_q: 47.573 Interval 1650 (824500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2059 Interval 1651 (825000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0267 Interval 1652 (825500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0479 Interval 1653 (826000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3947 1 episodes - episode_reward: 20.735 [20.735, 20.735] - loss: 17.585 - mae: 39.056 - mean_q: 48.721 Interval 1654 (826500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3242 1 episodes - episode_reward: 23.163 [23.163, 23.163] - loss: 15.185 - mae: 39.584 - mean_q: 48.937 Interval 1655 (827000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3293 1 episodes - episode_reward: -327.166 [-327.166, -327.166] - loss: 18.477 - mae: 39.606 - mean_q: 49.525 Interval 1656 (827500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0852 Interval 1657 (828000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1286 Interval 1658 (828500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5016 3 episodes - episode_reward: -110.663 [-224.572, -1.915] - loss: 16.215 - mae: 40.116 - mean_q: 49.914 Interval 1659 (829000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9670 2 episodes - episode_reward: -214.686 [-383.490, -45.882] - loss: 10.906 - mae: 40.625 - mean_q: 50.615 Interval 1660 (829500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5449 1 episodes - episode_reward: 250.268 [250.268, 250.268] - loss: 17.273 - mae: 40.940 - mean_q: 50.665 Interval 1661 (830000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4611 1 episodes - episode_reward: 225.123 [225.123, 225.123] - loss: 14.078 - mae: 40.982 - mean_q: 50.064 Interval 1662 (830500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1463 Interval 1663 (831000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6731 3 episodes - episode_reward: -89.690 [-349.751, 196.254] - loss: 16.139 - mae: 40.906 - mean_q: 49.895 Interval 1664 (831500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1807 1 episodes - episode_reward: -99.185 [-99.185, -99.185] - loss: 17.865 - mae: 40.821 - mean_q: 49.610 Interval 1665 (832000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5527 2 episodes - episode_reward: 250.642 [178.270, 323.013] - loss: 16.965 - mae: 40.870 - mean_q: 49.310 Interval 1666 (832500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4980e-04 2 episodes - episode_reward: -40.235 [-97.458, 16.987] - loss: 17.030 - mae: 41.175 - mean_q: 49.659 Interval 1667 (833000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0274 Interval 1668 (833500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3993 1 episodes - episode_reward: 189.757 [189.757, 189.757] - loss: 16.505 - mae: 41.010 - mean_q: 49.736 Interval 1669 (834000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2446 1 episodes - episode_reward: 205.550 [205.550, 205.550] - loss: 14.163 - mae: 41.059 - mean_q: 49.861 Interval 1670 (834500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0714 Interval 1671 (835000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.5287 1 episodes - episode_reward: 144.658 [144.658, 144.658] - loss: 16.860 - mae: 41.769 - mean_q: 51.067 Interval 1672 (835500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0589 1 episodes - episode_reward: 273.834 [273.834, 273.834] - loss: 17.418 - mae: 42.349 - mean_q: 51.889 Interval 1673 (836000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3493 1 episodes - episode_reward: -310.829 [-310.829, -310.829] - loss: 13.376 - mae: 42.708 - mean_q: 52.021 Interval 1674 (836500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0192 Interval 1675 (837000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4179 3 episodes - episode_reward: -68.395 [-132.465, 27.279] - loss: 16.816 - mae: 43.200 - mean_q: 53.033 Interval 1676 (837500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0939 Interval 1677 (838000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.4384 2 episodes - episode_reward: -62.896 [-280.288, 154.496] - loss: 17.405 - mae: 43.777 - mean_q: 53.569 Interval 1678 (838500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6609 1 episodes - episode_reward: -139.015 [-139.015, -139.015] - loss: 15.566 - mae: 44.554 - mean_q: 54.414 Interval 1679 (839000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1957 3 episodes - episode_reward: -262.934 [-436.503, -15.445] - loss: 21.611 - mae: 45.198 - mean_q: 55.301 Interval 1680 (839500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.5760 1 episodes - episode_reward: 214.293 [214.293, 214.293] - loss: 16.277 - mae: 45.703 - mean_q: 56.128 Interval 1681 (840000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4157 1 episodes - episode_reward: 178.164 [178.164, 178.164] - loss: 21.244 - mae: 45.892 - mean_q: 56.579 Interval 1682 (840500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3008 2 episodes - episode_reward: 93.784 [-56.529, 244.097] - loss: 21.701 - mae: 46.947 - mean_q: 57.144 Interval 1683 (841000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4180 1 episodes - episode_reward: 225.783 [225.783, 225.783] - loss: 17.010 - mae: 47.137 - mean_q: 58.185 Interval 1684 (841500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5362 2 episodes - episode_reward: 109.708 [-27.816, 247.232] - loss: 20.479 - mae: 46.726 - mean_q: 57.612 Interval 1685 (842000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1683 Interval 1686 (842500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0398 2 episodes - episode_reward: 74.175 [-100.000, 248.350] - loss: 18.627 - mae: 47.522 - mean_q: 58.868 Interval 1687 (843000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2215 1 episodes - episode_reward: -46.966 [-46.966, -46.966] - loss: 19.116 - mae: 47.744 - mean_q: 59.793 Interval 1688 (843500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8262 4 episodes - episode_reward: -115.574 [-144.184, -84.502] - loss: 20.243 - mae: 47.795 - mean_q: 59.416 Interval 1689 (844000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2588 3 episodes - episode_reward: -25.353 [-169.407, 217.105] - loss: 19.993 - mae: 47.957 - mean_q: 59.391 Interval 1690 (844500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2073 1 episodes - episode_reward: -4.411 [-4.411, -4.411] - loss: 19.835 - mae: 47.776 - mean_q: 58.961 Interval 1691 (845000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2344 1 episodes - episode_reward: 205.296 [205.296, 205.296] - loss: 14.516 - mae: 47.998 - mean_q: 59.556 Interval 1692 (845500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6262 2 episodes - episode_reward: -160.381 [-176.396, -144.365] - loss: 20.428 - mae: 48.532 - mean_q: 59.653 Interval 1693 (846000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2029 1 episodes - episode_reward: -769.797 [-769.797, -769.797] - loss: 19.787 - mae: 47.859 - mean_q: 59.225 Interval 1694 (846500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5537 2 episodes - episode_reward: -64.984 [-158.710, 28.743] - loss: 18.765 - mae: 47.317 - mean_q: 58.716 Interval 1695 (847000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4824 2 episodes - episode_reward: -132.218 [-199.927, -64.509] - loss: 17.949 - mae: 47.595 - mean_q: 59.681 Interval 1696 (847500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5345 2 episodes - episode_reward: -72.654 [-103.986, -41.322] - loss: 17.901 - mae: 48.066 - mean_q: 59.706 Interval 1697 (848000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8910 3 episodes - episode_reward: -175.306 [-293.947, -34.068] - loss: 19.685 - mae: 48.152 - mean_q: 59.111 Interval 1698 (848500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8504 3 episodes - episode_reward: -157.074 [-190.458, -122.125] - loss: 19.529 - mae: 47.990 - mean_q: 60.004 Interval 1699 (849000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1189 Interval 1700 (849500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8580 2 episodes - episode_reward: -231.231 [-231.803, -230.659] - loss: 21.518 - mae: 47.526 - mean_q: 58.684 Interval 1701 (850000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0660 Interval 1702 (850500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6208 2 episodes - episode_reward: -419.200 [-420.150, -418.249] - loss: 17.491 - mae: 47.445 - mean_q: 59.035 Interval 1703 (851000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2544 Interval 1704 (851500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6730 1 episodes - episode_reward: -903.284 [-903.284, -903.284] - loss: 17.870 - mae: 47.887 - mean_q: 59.947 Interval 1705 (852000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3076 3 episodes - episode_reward: -231.931 [-327.497, -155.773] - loss: 22.689 - mae: 48.072 - mean_q: 59.335 Interval 1706 (852500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0661 2 episodes - episode_reward: -283.278 [-417.664, -148.892] - loss: 23.634 - mae: 47.956 - mean_q: 58.864 Interval 1707 (853000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4462 2 episodes - episode_reward: -359.923 [-365.163, -354.683] - loss: 25.797 - mae: 48.362 - mean_q: 59.851 Interval 1708 (853500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0837 Interval 1709 (854000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.5859 2 episodes - episode_reward: -524.876 [-527.680, -522.072] - loss: 20.889 - mae: 48.071 - mean_q: 59.220 Interval 1710 (854500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7113 3 episodes - episode_reward: -204.926 [-290.754, -161.796] - loss: 22.740 - mae: 47.788 - mean_q: 58.646 Interval 1711 (855000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0956 2 episodes - episode_reward: -282.777 [-365.236, -200.318] - loss: 22.895 - mae: 48.007 - mean_q: 58.151 Interval 1712 (855500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3589 2 episodes - episode_reward: -347.578 [-528.856, -166.299] - loss: 23.817 - mae: 48.250 - mean_q: 57.946 Interval 1713 (856000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3926 2 episodes - episode_reward: -299.388 [-444.436, -154.339] - loss: 21.663 - mae: 48.051 - mean_q: 57.006 Interval 1714 (856500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8664 2 episodes - episode_reward: -281.080 [-390.621, -171.538] - loss: 23.816 - mae: 48.114 - mean_q: 56.488 Interval 1715 (857000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0838 2 episodes - episode_reward: -244.709 [-396.081, -93.336] - loss: 20.309 - mae: 47.909 - mean_q: 56.054 Interval 1716 (857500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4734 3 episodes - episode_reward: -261.695 [-419.621, -122.200] - loss: 23.862 - mae: 47.854 - mean_q: 56.041 Interval 1717 (858000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4434 Interval 1718 (858500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2729 3 episodes - episode_reward: -144.865 [-327.628, -20.241] - loss: 26.775 - mae: 47.248 - mean_q: 55.627 Interval 1719 (859000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0250 Interval 1720 (859500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1717 2 episodes - episode_reward: -0.510 [-238.106, 237.086] - loss: 23.438 - mae: 46.679 - mean_q: 55.054 Interval 1721 (860000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3925 3 episodes - episode_reward: -208.862 [-394.537, -108.172] - loss: 31.232 - mae: 46.327 - mean_q: 55.035 Interval 1722 (860500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2119 3 episodes - episode_reward: -211.267 [-289.867, -108.289] - loss: 22.793 - mae: 45.967 - mean_q: 54.872 Interval 1723 (861000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4117 2 episodes - episode_reward: -173.030 [-250.615, -95.446] - loss: 25.185 - mae: 46.084 - mean_q: 55.241 Interval 1724 (861500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1746 1 episodes - episode_reward: 230.999 [230.999, 230.999] - loss: 27.321 - mae: 46.297 - mean_q: 54.662 Interval 1725 (862000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7030 2 episodes - episode_reward: -167.505 [-256.122, -78.889] - loss: 25.583 - mae: 46.113 - mean_q: 55.730 Interval 1726 (862500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7093 5 episodes - episode_reward: -176.934 [-334.836, -53.937] - loss: 20.795 - mae: 46.699 - mean_q: 56.379 Interval 1727 (863000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1938 2 episodes - episode_reward: -271.056 [-307.702, -234.409] - loss: 23.535 - mae: 46.680 - mean_q: 56.933 Interval 1728 (863500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0444 3 episodes - episode_reward: -188.780 [-334.900, -23.840] - loss: 27.444 - mae: 46.720 - mean_q: 56.543 Interval 1729 (864000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1065 1 episodes - episode_reward: -51.581 [-51.581, -51.581] - loss: 22.646 - mae: 46.429 - mean_q: 56.222 Interval 1730 (864500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4027 3 episodes - episode_reward: -35.992 [-131.386, 142.131] - loss: 25.052 - mae: 46.858 - mean_q: 56.861 Interval 1731 (865000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1562 3 episodes - episode_reward: -201.685 [-312.807, -123.058] - loss: 20.980 - mae: 46.897 - mean_q: 57.167 Interval 1732 (865500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0114 1 episodes - episode_reward: -220.706 [-220.706, -220.706] - loss: 22.665 - mae: 46.486 - mean_q: 56.690 Interval 1733 (866000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1218 Interval 1734 (866500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7828 3 episodes - episode_reward: -261.135 [-413.650, -173.966] - loss: 26.247 - mae: 46.985 - mean_q: 56.825 Interval 1735 (867000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3718 1 episodes - episode_reward: -97.116 [-97.116, -97.116] - loss: 23.939 - mae: 46.741 - mean_q: 56.159 Interval 1736 (867500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4758 1 episodes - episode_reward: -316.810 [-316.810, -316.810] - loss: 25.493 - mae: 46.862 - mean_q: 56.936 Interval 1737 (868000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2808 1 episodes - episode_reward: -210.373 [-210.373, -210.373] - loss: 26.682 - mae: 46.888 - mean_q: 56.723 Interval 1738 (868500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0797 3 episodes - episode_reward: -294.858 [-538.073, -100.000] - loss: 25.312 - mae: 47.595 - mean_q: 56.476 Interval 1739 (869000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7631 4 episodes - episode_reward: -251.446 [-402.067, -113.788] - loss: 33.400 - mae: 47.927 - mean_q: 56.147 Interval 1740 (869500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1722 3 episodes - episode_reward: -14.085 [-151.599, 217.961] - loss: 31.706 - mae: 48.296 - mean_q: 56.730 Interval 1741 (870000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5223 3 episodes - episode_reward: -238.325 [-347.125, -65.929] - loss: 52.002 - mae: 48.651 - mean_q: 58.073 Interval 1742 (870500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8347 3 episodes - episode_reward: -206.686 [-328.704, -100.000] - loss: 207.494 - mae: 49.492 - mean_q: 60.130 Interval 1743 (871000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1722 Interval 1744 (871500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5239 2 episodes - episode_reward: -359.630 [-416.038, -303.221] - loss: 43.360 - mae: 51.709 - mean_q: 63.118 Interval 1745 (872000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.0991 7 episodes - episode_reward: -298.228 [-463.433, -114.193] - loss: 58.290 - mae: 53.139 - mean_q: 64.852 Interval 1746 (872500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2421 Interval 1747 (873000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8465 5 episodes - episode_reward: -170.424 [-300.237, -100.000] - loss: 47.288 - mae: 53.203 - mean_q: 63.110 Interval 1748 (873500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8544 3 episodes - episode_reward: -356.413 [-469.205, -140.480] - loss: 71.462 - mae: 52.866 - mean_q: 62.211 Interval 1749 (874000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6242 1 episodes - episode_reward: -412.186 [-412.186, -412.186] - loss: 33.400 - mae: 54.037 - mean_q: 64.872 Interval 1750 (874500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0192 2 episodes - episode_reward: -391.793 [-586.464, -197.121] - loss: 73.277 - mae: 54.101 - mean_q: 64.036 Interval 1751 (875000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5839 2 episodes - episode_reward: -235.705 [-242.416, -228.993] - loss: 52.877 - mae: 54.506 - mean_q: 64.376 Interval 1752 (875500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.2576 2 episodes - episode_reward: -781.207 [-1157.477, -404.938] - loss: 50.037 - mae: 54.032 - mean_q: 62.761 Interval 1753 (876000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8634 1 episodes - episode_reward: -481.098 [-481.098, -481.098] - loss: 56.106 - mae: 53.274 - mean_q: 60.316 Interval 1754 (876500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0776 Interval 1755 (877000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.3431 Interval 1756 (877500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0378 Interval 1757 (878000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.2596 1 episodes - episode_reward: -240.730 [-240.730, -240.730] - loss: 40.029 - mae: 51.485 - mean_q: 56.339 Interval 1758 (878500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7094 1 episodes - episode_reward: -311.975 [-311.975, -311.975] - loss: 42.372 - mae: 51.861 - mean_q: 56.919 Interval 1759 (879000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8754 2 episodes - episode_reward: -259.485 [-295.908, -223.062] - loss: 41.247 - mae: 51.947 - mean_q: 56.998 Interval 1760 (879500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5112 1 episodes - episode_reward: -253.112 [-253.112, -253.112] - loss: 43.343 - mae: 51.993 - mean_q: 56.929 Interval 1761 (880000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2572 1 episodes - episode_reward: -107.112 [-107.112, -107.112] - loss: 31.712 - mae: 52.129 - mean_q: 57.215 Interval 1762 (880500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1060 Interval 1763 (881000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1745 1 episodes - episode_reward: 160.036 [160.036, 160.036] - loss: 38.793 - mae: 52.577 - mean_q: 57.611 Interval 1764 (881500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5114 2 episodes - episode_reward: -471.603 [-859.888, -83.319] - loss: 45.705 - mae: 52.684 - mean_q: 59.161 Interval 1765 (882000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5507 2 episodes - episode_reward: -396.501 [-417.534, -375.469] - loss: 43.929 - mae: 53.611 - mean_q: 59.505 Interval 1766 (882500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4513 1 episodes - episode_reward: -215.737 [-215.737, -215.737] - loss: 31.400 - mae: 53.801 - mean_q: 60.212 Interval 1767 (883000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0800 Interval 1768 (883500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1154 2 episodes - episode_reward: 20.016 [-69.383, 109.415] - loss: 39.368 - mae: 54.982 - mean_q: 60.932 Interval 1769 (884000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1192 2 episodes - episode_reward: -12.361 [-232.237, 207.514] - loss: 39.637 - mae: 55.300 - mean_q: 61.579 Interval 1770 (884500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1157 Interval 1771 (885000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1244 5 episodes - episode_reward: -113.799 [-351.968, 159.191] - loss: 56.672 - mae: 56.430 - mean_q: 63.881 Interval 1772 (885500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7312 1 episodes - episode_reward: -386.979 [-386.979, -386.979] - loss: 43.852 - mae: 57.235 - mean_q: 65.160 Interval 1773 (886000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1398 1 episodes - episode_reward: -16.132 [-16.132, -16.132] - loss: 53.429 - mae: 58.077 - mean_q: 65.539 Interval 1774 (886500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7918 1 episodes - episode_reward: -399.025 [-399.025, -399.025] - loss: 42.827 - mae: 57.532 - mean_q: 66.111 Interval 1775 (887000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1847 1 episodes - episode_reward: -71.243 [-71.243, -71.243] - loss: 32.123 - mae: 56.606 - mean_q: 65.304 Interval 1776 (887500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1475 Interval 1777 (888000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1642 Interval 1778 (888500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0236 Interval 1779 (889000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0373 2 episodes - episode_reward: -14.521 [-107.044, 78.002] - loss: 34.758 - mae: 55.924 - mean_q: 64.802 Interval 1780 (889500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1111 Interval 1781 (890000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2341 Interval 1782 (890500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0956 6 episodes - episode_reward: -181.289 [-339.712, 15.986] - loss: 31.990 - mae: 53.824 - mean_q: 60.558 Interval 1783 (891000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1085 Interval 1784 (891500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3291 2 episodes - episode_reward: 34.526 [-56.547, 125.598] - loss: 36.939 - mae: 53.355 - mean_q: 58.722 Interval 1785 (892000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4437 1 episodes - episode_reward: -81.884 [-81.884, -81.884] - loss: 32.395 - mae: 52.867 - mean_q: 58.234 Interval 1786 (892500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0333 1 episodes - episode_reward: -71.562 [-71.562, -71.562] - loss: 36.360 - mae: 52.370 - mean_q: 56.696 Interval 1787 (893000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0895 3 episodes - episode_reward: 19.157 [-87.020, 170.242] - loss: 33.828 - mae: 51.227 - mean_q: 57.719 Interval 1788 (893500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1207 1 episodes - episode_reward: -115.287 [-115.287, -115.287] - loss: 40.626 - mae: 51.777 - mean_q: 57.066 Interval 1789 (894000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0302 Interval 1790 (894500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1048 1 episodes - episode_reward: -36.252 [-36.252, -36.252] - loss: 43.402 - mae: 51.224 - mean_q: 57.440 Interval 1791 (895000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0506 Interval 1792 (895500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.3123 Interval 1793 (896000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1637 Interval 1794 (896500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.3218 1 episodes - episode_reward: -405.347 [-405.347, -405.347] - loss: 31.303 - mae: 50.832 - mean_q: 55.513 Interval 1795 (897000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.3921 2 episodes - episode_reward: -882.210 [-1635.836, -128.583] - loss: 31.177 - mae: 50.740 - mean_q: 56.940 Interval 1796 (897500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4170 1 episodes - episode_reward: 208.477 [208.477, 208.477] - loss: 35.562 - mae: 50.921 - mean_q: 56.739 Interval 1797 (898000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1396 Interval 1798 (898500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1219 1 episodes - episode_reward: -150.283 [-150.283, -150.283] - loss: 29.657 - mae: 50.761 - mean_q: 55.819 Interval 1799 (899000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2444 Interval 1800 (899500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5070 2 episodes - episode_reward: -147.841 [-168.989, -126.692] - loss: 37.239 - mae: 50.743 - mean_q: 55.153 Interval 1801 (900000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1134 1 episodes - episode_reward: -116.290 [-116.290, -116.290] - loss: 30.749 - mae: 50.295 - mean_q: 54.146 Interval 1802 (900500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6209 2 episodes - episode_reward: -152.576 [-212.213, -92.940] - loss: 33.470 - mae: 49.900 - mean_q: 56.289 Interval 1803 (901000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0891 Interval 1804 (901500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2632 Interval 1805 (902000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.9093 3 episodes - episode_reward: -196.487 [-346.132, -115.258] - loss: 51.038 - mae: 49.500 - mean_q: 55.314 Interval 1806 (902500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2978 2 episodes - episode_reward: -71.647 [-115.596, -27.697] - loss: 28.557 - mae: 49.466 - mean_q: 55.311 Interval 1807 (903000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9727 1 episodes - episode_reward: -160.499 [-160.499, -160.499] - loss: 32.831 - mae: 49.274 - mean_q: 55.822 Interval 1808 (903500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8594 3 episodes - episode_reward: -416.972 [-905.749, -122.614] - loss: 38.016 - mae: 49.022 - mean_q: 54.037 Interval 1809 (904000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5427 1 episodes - episode_reward: -201.073 [-201.073, -201.073] - loss: 45.524 - mae: 47.803 - mean_q: 53.703 Interval 1810 (904500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5979 2 episodes - episode_reward: -202.247 [-203.605, -200.888] - loss: 43.759 - mae: 46.895 - mean_q: 52.529 Interval 1811 (905000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0515 Interval 1812 (905500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0458 1 episodes - episode_reward: 26.534 [26.534, 26.534] - loss: 27.709 - mae: 46.064 - mean_q: 51.745 Interval 1813 (906000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8510 3 episodes - episode_reward: -159.427 [-310.718, -51.761] - loss: 28.528 - mae: 45.641 - mean_q: 49.966 Interval 1814 (906500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8017 2 episodes - episode_reward: -157.792 [-220.625, -94.959] - loss: 29.785 - mae: 45.372 - mean_q: 48.662 Interval 1815 (907000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3565 1 episodes - episode_reward: -218.999 [-218.999, -218.999] - loss: 26.520 - mae: 45.693 - mean_q: 48.474 Interval 1816 (907500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0541 Interval 1817 (908000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0329 2 episodes - episode_reward: 10.360 [-151.971, 172.691] - loss: 29.602 - mae: 43.924 - mean_q: 46.547 Interval 1818 (908500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7228 3 episodes - episode_reward: -126.182 [-164.073, -104.131] - loss: 29.343 - mae: 43.815 - mean_q: 45.677 Interval 1819 (909000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7130 5 episodes - episode_reward: -163.370 [-349.424, -92.825] - loss: 24.540 - mae: 43.143 - mean_q: 45.210 Interval 1820 (909500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4569 1 episodes - episode_reward: -230.554 [-230.554, -230.554] - loss: 23.942 - mae: 42.989 - mean_q: 43.845 Interval 1821 (910000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4161 4 episodes - episode_reward: -179.810 [-273.133, -72.725] - loss: 25.213 - mae: 42.883 - mean_q: 43.939 Interval 1822 (910500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6991 1 episodes - episode_reward: -396.432 [-396.432, -396.432] - loss: 24.067 - mae: 42.053 - mean_q: 42.788 Interval 1823 (911000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8379 1 episodes - episode_reward: -484.439 [-484.439, -484.439] - loss: 27.442 - mae: 42.370 - mean_q: 43.459 Interval 1824 (911500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4375 2 episodes - episode_reward: -79.513 [-105.831, -53.195] - loss: 26.334 - mae: 42.801 - mean_q: 43.077 Interval 1825 (912000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8469 3 episodes - episode_reward: -182.317 [-360.639, -54.709] - loss: 25.856 - mae: 43.345 - mean_q: 43.430 Interval 1826 (912500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0284 Interval 1827 (913000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1071 Interval 1828 (913500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0167 Interval 1829 (914000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6691 2 episodes - episode_reward: 212.186 [194.386, 229.985] - loss: 25.554 - mae: 44.538 - mean_q: 43.905 Interval 1830 (914500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0715 4 episodes - episode_reward: -260.664 [-571.733, -43.479] - loss: 27.588 - mae: 44.569 - mean_q: 43.887 Interval 1831 (915000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2616 2 episodes - episode_reward: -56.115 [-61.617, -50.613] - loss: 30.708 - mae: 44.994 - mean_q: 44.553 Interval 1832 (915500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3319 1 episodes - episode_reward: -118.896 [-118.896, -118.896] - loss: 34.751 - mae: 45.881 - mean_q: 46.019 Interval 1833 (916000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1720 Interval 1834 (916500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.0026 Interval 1835 (917000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.5647 1 episodes - episode_reward: -406.498 [-406.498, -406.498] - loss: 24.767 - mae: 46.067 - mean_q: 47.788 Interval 1836 (917500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1362 Interval 1837 (918000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6477 2 episodes - episode_reward: -194.406 [-274.840, -113.972] - loss: 29.002 - mae: 46.129 - mean_q: 47.285 Interval 1838 (918500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1356 Interval 1839 (919000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3587 1 episodes - episode_reward: -241.717 [-241.717, -241.717] - loss: 25.694 - mae: 46.423 - mean_q: 49.016 Interval 1840 (919500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1781 Interval 1841 (920000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0484 Interval 1842 (920500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.3658 Interval 1843 (921000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1819 Interval 1844 (921500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1871 Interval 1845 (922000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2188 Interval 1846 (922500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -1.0172 1 episodes - episode_reward: -1129.873 [-1129.873, -1129.873] - loss: 29.775 - mae: 50.103 - mean_q: 56.886 Interval 1847 (923000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9242 Interval 1848 (923500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6706 2 episodes - episode_reward: -326.383 [-555.161, -97.606] - loss: 33.720 - mae: 51.574 - mean_q: 58.513 Interval 1849 (924000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6758 2 episodes - episode_reward: -163.040 [-226.079, -100.000] - loss: 32.851 - mae: 52.289 - mean_q: 59.865 Interval 1850 (924500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0755 1 episodes - episode_reward: -211.238 [-211.238, -211.238] - loss: 28.598 - mae: 53.242 - mean_q: 60.970 Interval 1851 (925000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0516 Interval 1852 (925500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6942 4 episodes - episode_reward: -86.292 [-125.445, -24.863] - loss: 38.842 - mae: 54.180 - mean_q: 63.679 Interval 1853 (926000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0574 Interval 1854 (926500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2415 Interval 1855 (927000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9143 1 episodes - episode_reward: -511.886 [-511.886, -511.886] - loss: 39.772 - mae: 56.995 - mean_q: 69.194 Interval 1856 (927500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0749 Interval 1857 (928000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1770 Interval 1858 (928500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1327 Interval 1859 (929000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3890 1 episodes - episode_reward: -359.562 [-359.562, -359.562] - loss: 53.325 - mae: 60.488 - mean_q: 74.013 Interval 1860 (929500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9729 1 episodes - episode_reward: -536.799 [-536.799, -536.799] - loss: 42.545 - mae: 61.395 - mean_q: 75.258 Interval 1861 (930000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1887 1 episodes - episode_reward: -147.232 [-147.232, -147.232] - loss: 44.959 - mae: 63.241 - mean_q: 78.028 Interval 1862 (930500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2409 1 episodes - episode_reward: -89.662 [-89.662, -89.662] - loss: 49.380 - mae: 63.316 - mean_q: 78.679 Interval 1863 (931000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4604 2 episodes - episode_reward: -118.211 [-155.908, -80.515] - loss: 49.695 - mae: 65.235 - mean_q: 80.465 Interval 1864 (931500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3766 1 episodes - episode_reward: -680.997 [-680.997, -680.997] - loss: 44.204 - mae: 66.472 - mean_q: 83.176 Interval 1865 (932000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6816 2 episodes - episode_reward: -169.160 [-176.086, -162.235] - loss: 67.391 - mae: 66.604 - mean_q: 83.488 Interval 1866 (932500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9643 3 episodes - episode_reward: -174.443 [-202.651, -123.301] - loss: 61.141 - mae: 68.232 - mean_q: 85.960 Interval 1867 (933000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5807 2 episodes - episode_reward: -75.921 [-164.634, 12.792] - loss: 54.344 - mae: 67.272 - mean_q: 84.282 Interval 1868 (933500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4463 3 episodes - episode_reward: -206.275 [-245.413, -154.948] - loss: 48.928 - mae: 65.471 - mean_q: 82.260 Interval 1869 (934000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 2.8996e-04 1 episodes - episode_reward: -300.200 [-300.200, -300.200] - loss: 46.406 - mae: 64.880 - mean_q: 82.023 Interval 1870 (934500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0674 Interval 1871 (935000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3805 7 episodes - episode_reward: -89.167 [-183.359, 166.438] - loss: 29.413 - mae: 63.974 - mean_q: 80.210 Interval 1872 (935500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9235 1 episodes - episode_reward: -944.205 [-944.205, -944.205] - loss: 41.448 - mae: 64.288 - mean_q: 80.711 Interval 1873 (936000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1935 Interval 1874 (936500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0258 1 episodes - episode_reward: 152.277 [152.277, 152.277] - loss: 35.319 - mae: 63.416 - mean_q: 79.000 Interval 1875 (937000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5053 2 episodes - episode_reward: -165.900 [-209.740, -122.061] - loss: 84.929 - mae: 62.912 - mean_q: 78.459 Interval 1876 (937500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4661 3 episodes - episode_reward: -260.756 [-298.785, -224.863] - loss: 53.264 - mae: 61.518 - mean_q: 76.917 Interval 1877 (938000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5827 2 episodes - episode_reward: -467.825 [-588.118, -347.531] - loss: 35.239 - mae: 60.855 - mean_q: 75.702 Interval 1878 (938500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2081 Interval 1879 (939000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0069 Interval 1880 (939500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3310 1 episodes - episode_reward: -77.003 [-77.003, -77.003] - loss: 32.154 - mae: 59.833 - mean_q: 73.760 Interval 1881 (940000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0888 1 episodes - episode_reward: -84.618 [-84.618, -84.618] - loss: 38.550 - mae: 59.376 - mean_q: 73.083 Interval 1882 (940500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2736 1 episodes - episode_reward: -60.900 [-60.900, -60.900] - loss: 29.551 - mae: 58.710 - mean_q: 72.471 Interval 1883 (941000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4822 1 episodes - episode_reward: -620.466 [-620.466, -620.466] - loss: 35.273 - mae: 58.212 - mean_q: 71.969 Interval 1884 (941500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2761 1 episodes - episode_reward: -261.730 [-261.730, -261.730] - loss: 33.143 - mae: 57.504 - mean_q: 71.091 Interval 1885 (942000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8975 2 episodes - episode_reward: -182.156 [-203.299, -161.013] - loss: 25.734 - mae: 57.457 - mean_q: 71.380 Interval 1886 (942500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4813 5 episodes - episode_reward: -154.733 [-248.709, -40.266] - loss: 28.165 - mae: 57.584 - mean_q: 71.409 Interval 1887 (943000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0975 4 episodes - episode_reward: -158.321 [-209.192, -73.473] - loss: 30.378 - mae: 57.782 - mean_q: 71.029 Interval 1888 (943500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4732 3 episodes - episode_reward: -246.435 [-427.415, -126.022] - loss: 31.077 - mae: 57.813 - mean_q: 70.836 Interval 1889 (944000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2292 2 episodes - episode_reward: -50.039 [-94.435, -5.644] - loss: 28.779 - mae: 58.798 - mean_q: 72.421 Interval 1890 (944500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5800 3 episodes - episode_reward: -253.699 [-487.007, -132.624] - loss: 37.810 - mae: 59.091 - mean_q: 71.195 Interval 1891 (945000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.8344 4 episodes - episode_reward: -248.295 [-553.049, -96.616] - loss: 29.717 - mae: 59.374 - mean_q: 71.118 Interval 1892 (945500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7799 3 episodes - episode_reward: -443.769 [-784.072, -247.654] - loss: 34.910 - mae: 60.013 - mean_q: 71.093 Interval 1893 (946000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7555 Interval 1894 (946500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.3835 4 episodes - episode_reward: -376.472 [-553.775, -197.577] - loss: 45.303 - mae: 61.826 - mean_q: 72.150 Interval 1895 (947000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8207 3 episodes - episode_reward: -158.631 [-243.465, -83.004] - loss: 36.860 - mae: 63.784 - mean_q: 75.756 Interval 1896 (947500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7918 3 episodes - episode_reward: -309.287 [-645.214, -123.141] - loss: 60.696 - mae: 65.613 - mean_q: 77.524 Interval 1897 (948000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5538 1 episodes - episode_reward: -247.405 [-247.405, -247.405] - loss: 84.509 - mae: 67.074 - mean_q: 79.039 Interval 1898 (948500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5632 2 episodes - episode_reward: -320.895 [-362.672, -279.118] - loss: 38.930 - mae: 66.875 - mean_q: 78.745 Interval 1899 (949000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.2293 4 episodes - episode_reward: -314.829 [-481.654, -134.780] - loss: 39.009 - mae: 69.018 - mean_q: 81.917 Interval 1900 (949500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3712 2 episodes - episode_reward: -357.905 [-396.688, -319.122] - loss: 49.558 - mae: 70.595 - mean_q: 83.385 Interval 1901 (950000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2432 Interval 1902 (950500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.3437 2 episodes - episode_reward: -398.145 [-494.423, -301.868] - loss: 66.144 - mae: 74.122 - mean_q: 89.548 Interval 1903 (951000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1078 Interval 1904 (951500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8227 4 episodes - episode_reward: -240.289 [-406.634, -107.239] - loss: 64.880 - mae: 76.235 - mean_q: 94.722 Interval 1905 (952000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0494 1 episodes - episode_reward: -74.996 [-74.996, -74.996] - loss: 320.945 - mae: 77.093 - mean_q: 96.580 Interval 1906 (952500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.8798 2 episodes - episode_reward: -184.499 [-268.998, -100.000] - loss: 89.646 - mae: 80.407 - mean_q: 101.436 Interval 1907 (953000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1699 1 episodes - episode_reward: -98.138 [-98.138, -98.138] - loss: 85.759 - mae: 81.133 - mean_q: 103.300 Interval 1908 (953500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4287 Interval 1909 (954000 steps performed) 500/500 [==============================] - 2032s 4s/step - reward: -3.9847 4 episodes - episode_reward: -560.820 [-896.410, -100.000] - loss: 98.112 - mae: 83.004 - mean_q: 105.008 Interval 1910 (954500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5166 3 episodes - episode_reward: -217.912 [-339.259, -139.530] - loss: 101.214 - mae: 85.294 - mean_q: 108.024 Interval 1911 (955000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1318 2 episodes - episode_reward: -302.162 [-377.348, -226.975] - loss: 295.879 - mae: 85.891 - mean_q: 108.865 Interval 1912 (955500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1414 1 episodes - episode_reward: -499.285 [-499.285, -499.285] - loss: 310.834 - mae: 86.359 - mean_q: 110.217 Interval 1913 (956000 steps performed) 500/500 [==============================] - 15s 29ms/step - reward: -0.5975 1 episodes - episode_reward: -387.921 [-387.921, -387.921] - loss: 294.021 - mae: 89.422 - mean_q: 116.054 Interval 1914 (956500 steps performed) 500/500 [==============================] - 16s 31ms/step - reward: -2.3711 4 episodes - episode_reward: -269.215 [-505.209, -165.797] - loss: 468.980 - mae: 93.429 - mean_q: 122.033 Interval 1915 (957000 steps performed) 500/500 [==============================] - 16s 31ms/step - reward: -0.0692 1 episodes - episode_reward: -116.452 [-116.452, -116.452] - loss: 279.504 - mae: 94.383 - mean_q: 123.959 Interval 1916 (957500 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -1.0977 3 episodes - episode_reward: -138.049 [-183.413, -100.000] - loss: 355.492 - mae: 96.269 - mean_q: 126.657 Interval 1917 (958000 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -2.0711 2 episodes - episode_reward: -519.031 [-793.042, -245.020] - loss: 430.907 - mae: 97.911 - mean_q: 128.575 Interval 1918 (958500 steps performed) 500/500 [==============================] - 16s 31ms/step - reward: -5.0004 3 episodes - episode_reward: -620.068 [-1394.602, -61.300] - loss: 170.622 - mae: 100.590 - mean_q: 132.098 Interval 1919 (959000 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -2.9447 4 episodes - episode_reward: -530.135 [-865.541, -376.660] - loss: 1030.882 - mae: 99.498 - mean_q: 130.252 Interval 1920 (959500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.1905 2 episodes - episode_reward: -351.178 [-456.589, -245.767] - loss: 368.114 - mae: 101.437 - mean_q: 132.287 Interval 1921 (960000 steps performed) 500/500 [==============================] - 17s 34ms/step - reward: -1.6675 4 episodes - episode_reward: -210.182 [-288.876, -117.354] - loss: 372.211 - mae: 100.323 - mean_q: 131.586 Interval 1922 (960500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -2.1332 3 episodes - episode_reward: -339.386 [-647.037, -142.956] - loss: 549.771 - mae: 103.328 - mean_q: 135.042 Interval 1923 (961000 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -1.1893 Interval 1924 (961500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -4.4458 7 episodes - episode_reward: -404.993 [-743.222, -144.667] - loss: 308.427 - mae: 104.683 - mean_q: 136.390 Interval 1925 (962000 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -4.4435 4 episodes - episode_reward: -560.466 [-1055.814, -185.307] - loss: 115.590 - mae: 107.224 - mean_q: 139.358 Interval 1926 (962500 steps performed) 500/500 [==============================] - 16s 31ms/step - reward: -3.6447 4 episodes - episode_reward: -441.744 [-733.809, -112.349] - loss: 554.503 - mae: 107.519 - mean_q: 139.168 Interval 1927 (963000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -5.2900 5 episodes - episode_reward: -522.044 [-1011.740, -100.000] - loss: 241.281 - mae: 108.721 - mean_q: 139.952 Interval 1928 (963500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -5.5552 5 episodes - episode_reward: -581.497 [-1042.277, -161.538] - loss: 637.553 - mae: 107.222 - mean_q: 136.923 Interval 1929 (964000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -3.6867 4 episodes - episode_reward: -457.983 [-747.039, -184.297] - loss: 169.810 - mae: 109.707 - mean_q: 139.838 Interval 1930 (964500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -2.0440 2 episodes - episode_reward: -298.943 [-354.561, -243.326] - loss: 394.880 - mae: 108.472 - mean_q: 138.255 Interval 1931 (965000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -2.2650 6 episodes - episode_reward: -254.698 [-531.797, -100.000] - loss: 425.244 - mae: 106.322 - mean_q: 134.843 Interval 1932 (965500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -3.7800 3 episodes - episode_reward: -631.681 [-643.737, -623.202] - loss: 183.113 - mae: 105.145 - mean_q: 132.962 Interval 1933 (966000 steps performed) 500/500 [==============================] - 16s 31ms/step - reward: -0.5044 1 episodes - episode_reward: -201.322 [-201.322, -201.322] - loss: 153.973 - mae: 107.439 - mean_q: 135.225 Interval 1934 (966500 steps performed) 500/500 [==============================] - 18s 36ms/step - reward: -0.4571 1 episodes - episode_reward: -332.832 [-332.832, -332.832] - loss: 193.754 - mae: 107.784 - mean_q: 135.545 Interval 1935 (967000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.6402 3 episodes - episode_reward: -258.151 [-549.530, 35.664] - loss: 199.474 - mae: 109.316 - mean_q: 137.487 Interval 1936 (967500 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -2.9932 1 episodes - episode_reward: -910.067 [-910.067, -910.067] - loss: 173.760 - mae: 108.721 - mean_q: 136.184 Interval 1937 (968000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.8691 4 episodes - episode_reward: -374.012 [-776.666, -100.000] - loss: 776.433 - mae: 110.459 - mean_q: 138.202 Interval 1938 (968500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.1770 1 episodes - episode_reward: -625.473 [-625.473, -625.473] - loss: 102.693 - mae: 105.303 - mean_q: 132.107 Interval 1939 (969000 steps performed) 500/500 [==============================] - 17s 34ms/step - reward: -0.5071 Interval 1940 (969500 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -2.8765 2 episodes - episode_reward: -852.078 [-1331.473, -372.682] - loss: 243.980 - mae: 109.575 - mean_q: 136.423 Interval 1941 (970000 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -2.1320 1 episodes - episode_reward: -850.196 [-850.196, -850.196] - loss: 250.656 - mae: 111.283 - mean_q: 138.851 Interval 1942 (970500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -1.9048 2 episodes - episode_reward: -566.119 [-812.919, -319.319] - loss: 226.854 - mae: 113.376 - mean_q: 141.966 Interval 1943 (971000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.3129 2 episodes - episode_reward: -271.243 [-345.628, -196.858] - loss: 134.950 - mae: 113.394 - mean_q: 140.634 Interval 1944 (971500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -1.8919 3 episodes - episode_reward: -355.124 [-431.492, -230.042] - loss: 460.031 - mae: 113.647 - mean_q: 140.431 Interval 1945 (972000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.3044 1 episodes - episode_reward: -473.325 [-473.325, -473.325] - loss: 149.075 - mae: 114.058 - mean_q: 141.338 Interval 1946 (972500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -2.7699 4 episodes - episode_reward: -326.328 [-456.430, -71.880] - loss: 201.363 - mae: 113.486 - mean_q: 140.372 Interval 1947 (973000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.0545 3 episodes - episode_reward: -245.520 [-362.467, -20.530] - loss: 811.292 - mae: 112.671 - mean_q: 139.093 Interval 1948 (973500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -3.6200 5 episodes - episode_reward: -371.033 [-529.109, -251.696] - loss: 320.307 - mae: 112.840 - mean_q: 138.384 Interval 1949 (974000 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -2.4145 4 episodes - episode_reward: -295.092 [-497.421, -171.333] - loss: 311.853 - mae: 113.157 - mean_q: 138.456 Interval 1950 (974500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -4.6471 7 episodes - episode_reward: -336.740 [-485.746, -137.361] - loss: 287.820 - mae: 112.158 - mean_q: 134.895 Interval 1951 (975000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.9646 4 episodes - episode_reward: -244.017 [-388.169, -93.244] - loss: 269.935 - mae: 111.487 - mean_q: 132.577 Interval 1952 (975500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -2.4090 4 episodes - episode_reward: -275.171 [-306.070, -222.832] - loss: 298.180 - mae: 109.411 - mean_q: 128.787 Interval 1953 (976000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -5.0037 6 episodes - episode_reward: -295.118 [-408.341, -114.629] - loss: 282.557 - mae: 107.737 - mean_q: 126.350 Interval 1954 (976500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -3.6930 5 episodes - episode_reward: -540.367 [-1216.140, -295.816] - loss: 706.432 - mae: 109.186 - mean_q: 126.383 Interval 1955 (977000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -2.3244 5 episodes - episode_reward: -218.763 [-385.486, -100.000] - loss: 902.167 - mae: 101.184 - mean_q: 114.813 Interval 1956 (977500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -2.1172 4 episodes - episode_reward: -263.602 [-388.583, -156.828] - loss: 229.597 - mae: 97.635 - mean_q: 108.326 Interval 1957 (978000 steps performed) 500/500 [==============================] - 16s 31ms/step - reward: -2.6417 4 episodes - episode_reward: -297.747 [-583.082, -100.000] - loss: 513.886 - mae: 97.237 - mean_q: 106.912 Interval 1958 (978500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -1.0649 3 episodes - episode_reward: -224.249 [-294.173, -137.877] - loss: 315.165 - mae: 95.885 - mean_q: 103.748 Interval 1959 (979000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -2.4834 3 episodes - episode_reward: -427.262 [-859.103, -208.449] - loss: 264.541 - mae: 95.161 - mean_q: 103.758 Interval 1960 (979500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.7421 4 episodes - episode_reward: -219.303 [-474.050, -22.096] - loss: 223.562 - mae: 94.445 - mean_q: 100.733 Interval 1961 (980000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -0.7405 2 episodes - episode_reward: -186.135 [-304.261, -68.009] - loss: 130.306 - mae: 92.498 - mean_q: 96.940 Interval 1962 (980500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -2.7401 5 episodes - episode_reward: -272.212 [-446.643, -151.680] - loss: 107.145 - mae: 90.828 - mean_q: 95.948 Interval 1963 (981000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.3184 2 episodes - episode_reward: -273.210 [-412.176, -134.244] - loss: 156.849 - mae: 90.466 - mean_q: 94.730 Interval 1964 (981500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -6.7528 5 episodes - episode_reward: -667.211 [-1263.135, -157.956] - loss: 164.065 - mae: 89.364 - mean_q: 93.783 Interval 1965 (982000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -4.1252 5 episodes - episode_reward: -395.442 [-1261.244, -2.287] - loss: 345.355 - mae: 88.205 - mean_q: 92.029 Interval 1966 (982500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -1.9918 2 episodes - episode_reward: -360.190 [-483.635, -236.744] - loss: 173.843 - mae: 87.269 - mean_q: 88.956 Interval 1967 (983000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.9873 4 episodes - episode_reward: -361.856 [-754.571, -105.773] - loss: 152.184 - mae: 86.139 - mean_q: 85.173 Interval 1968 (983500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -2.0855 4 episodes - episode_reward: -179.091 [-222.043, -152.165] - loss: 154.392 - mae: 83.777 - mean_q: 85.275 Interval 1969 (984000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.9559 5 episodes - episode_reward: -244.316 [-639.771, -35.131] - loss: 154.770 - mae: 80.889 - mean_q: 81.379 Interval 1970 (984500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -3.2909 4 episodes - episode_reward: -412.211 [-922.322, -159.203] - loss: 133.588 - mae: 79.021 - mean_q: 78.781 Interval 1971 (985000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -3.6918 7 episodes - episode_reward: -285.671 [-426.560, -166.554] - loss: 93.509 - mae: 77.208 - mean_q: 75.064 Interval 1972 (985500 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -2.3021 2 episodes - episode_reward: -578.088 [-755.708, -400.467] - loss: 88.078 - mae: 76.398 - mean_q: 72.212 Interval 1973 (986000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -2.2060 3 episodes - episode_reward: -299.726 [-360.914, -257.353] - loss: 85.926 - mae: 74.528 - mean_q: 68.399 Interval 1974 (986500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -3.2003 3 episodes - episode_reward: -602.561 [-1047.190, -247.033] - loss: 68.934 - mae: 73.283 - mean_q: 67.598 Interval 1975 (987000 steps performed) 500/500 [==============================] - 15s 29ms/step - reward: -1.5791 4 episodes - episode_reward: -146.215 [-211.722, -76.056] - loss: 107.805 - mae: 72.684 - mean_q: 67.661 Interval 1976 (987500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -2.3434 3 episodes - episode_reward: -424.424 [-437.067, -400.850] - loss: 75.559 - mae: 72.058 - mean_q: 65.718 Interval 1977 (988000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -2.7599 4 episodes - episode_reward: -350.783 [-526.615, -127.248] - loss: 78.967 - mae: 69.954 - mean_q: 62.702 Interval 1978 (988500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.8574 2 episodes - episode_reward: -404.777 [-461.929, -347.626] - loss: 67.025 - mae: 68.600 - mean_q: 60.293 Interval 1979 (989000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.4678 2 episodes - episode_reward: -408.757 [-526.145, -291.370] - loss: 62.324 - mae: 68.305 - mean_q: 60.983 Interval 1980 (989500 steps performed) 500/500 [==============================] - 16s 31ms/step - reward: -1.0260 3 episodes - episode_reward: -152.847 [-282.041, -25.827] - loss: 59.366 - mae: 68.459 - mean_q: 59.872 Interval 1981 (990000 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -0.9993 3 episodes - episode_reward: -217.293 [-537.123, -21.183] - loss: 71.137 - mae: 68.547 - mean_q: 60.502 Interval 1982 (990500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -2.0248 5 episodes - episode_reward: -209.972 [-359.626, -93.635] - loss: 86.531 - mae: 68.956 - mean_q: 62.302 Interval 1983 (991000 steps performed) 500/500 [==============================] - 15s 29ms/step - reward: -1.7399 5 episodes - episode_reward: -156.354 [-196.446, -117.178] - loss: 67.068 - mae: 71.551 - mean_q: 65.335 Interval 1984 (991500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.6592 5 episodes - episode_reward: -166.222 [-202.776, -106.288] - loss: 73.580 - mae: 73.027 - mean_q: 65.590 Interval 1985 (992000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.7511 4 episodes - episode_reward: -233.882 [-362.222, -112.170] - loss: 76.927 - mae: 75.452 - mean_q: 65.159 Interval 1986 (992500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -1.1454 3 episodes - episode_reward: -166.548 [-321.122, -3.078] - loss: 85.446 - mae: 76.066 - mean_q: 67.199 Interval 1987 (993000 steps performed) 500/500 [==============================] - 15s 29ms/step - reward: -1.7363 4 episodes - episode_reward: -208.541 [-334.342, -135.137] - loss: 78.250 - mae: 75.948 - mean_q: 64.916 Interval 1988 (993500 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -2.0471 4 episodes - episode_reward: -280.803 [-433.513, -165.775] - loss: 87.624 - mae: 76.182 - mean_q: 61.671 Interval 1989 (994000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -0.9745 3 episodes - episode_reward: -167.301 [-248.342, -90.546] - loss: 60.468 - mae: 76.205 - mean_q: 61.953 Interval 1990 (994500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.2678 4 episodes - episode_reward: -133.783 [-196.711, -73.298] - loss: 75.188 - mae: 76.751 - mean_q: 61.438 Interval 1991 (995000 steps performed) 500/500 [==============================] - 15s 29ms/step - reward: -1.9097 4 episodes - episode_reward: -247.352 [-389.067, -184.588] - loss: 64.486 - mae: 75.743 - mean_q: 57.447 Interval 1992 (995500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -2.3104 5 episodes - episode_reward: -226.253 [-505.889, -123.812] - loss: 57.820 - mae: 74.889 - mean_q: 57.130 Interval 1993 (996000 steps performed) 500/500 [==============================] - 15s 29ms/step - reward: -0.9010 4 episodes - episode_reward: -134.545 [-186.884, -50.401] - loss: 58.027 - mae: 73.935 - mean_q: 54.942 Interval 1994 (996500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.1987 4 episodes - episode_reward: -141.048 [-180.571, -98.041] - loss: 57.573 - mae: 73.130 - mean_q: 52.706 Interval 1995 (997000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.3517 5 episodes - episode_reward: -139.564 [-194.369, -96.377] - loss: 59.524 - mae: 72.907 - mean_q: 52.362 Interval 1996 (997500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -0.6198 2 episodes - episode_reward: -130.520 [-199.685, -61.356] - loss: 60.633 - mae: 72.812 - mean_q: 49.601 Interval 1997 (998000 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -1.7169 6 episodes - episode_reward: -142.799 [-176.945, -114.373] - loss: 60.387 - mae: 71.891 - mean_q: 48.141 Interval 1998 (998500 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -0.5960 3 episodes - episode_reward: -102.927 [-157.435, -9.447] - loss: 50.929 - mae: 70.944 - mean_q: 46.463 Interval 1999 (999000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.8052 5 episodes - episode_reward: -186.081 [-226.002, -134.702] - loss: 52.954 - mae: 71.705 - mean_q: 46.392 Interval 2000 (999500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.5561 5 episodes - episode_reward: -136.172 [-276.209, -31.189] - loss: 54.658 - mae: 71.129 - mean_q: 44.876 Interval 2001 (1000000 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -1.0902 3 episodes - episode_reward: -199.970 [-377.231, -108.943] - loss: 54.726 - mae: 71.982 - mean_q: 44.603 Interval 2002 (1000500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.9346 3 episodes - episode_reward: -320.929 [-592.323, -179.145] - loss: 65.083 - mae: 71.924 - mean_q: 41.768 Interval 2003 (1001000 steps performed) 500/500 [==============================] - 15s 29ms/step - reward: -1.1361 4 episodes - episode_reward: -165.210 [-197.211, -100.000] - loss: 47.534 - mae: 70.810 - mean_q: 41.346 Interval 2004 (1001500 steps performed) 500/500 [==============================] - 16s 31ms/step - reward: -1.2326 2 episodes - episode_reward: -238.981 [-282.540, -195.422] - loss: 59.721 - mae: 71.172 - mean_q: 41.632 Interval 2005 (1002000 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -1.4720 2 episodes - episode_reward: -341.062 [-438.187, -243.936] - loss: 55.581 - mae: 70.372 - mean_q: 41.838 Interval 2006 (1002500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.1836 3 episodes - episode_reward: -218.906 [-316.987, -113.577] - loss: 50.742 - mae: 69.322 - mean_q: 40.289 Interval 2007 (1003000 steps performed) 500/500 [==============================] - 13s 25ms/step - reward: -1.9346 3 episodes - episode_reward: -265.665 [-356.324, -194.637] - loss: 78.040 - mae: 69.046 - mean_q: 37.710 Interval 2008 (1003500 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -5.9418 2 episodes - episode_reward: -1567.904 [-2903.421, -232.387] - loss: 76.702 - mae: 66.491 - mean_q: 34.749 Interval 2009 (1004000 steps performed) 500/500 [==============================] - 12s 25ms/step - reward: -1.9078 4 episodes - episode_reward: -249.053 [-333.984, -102.824] - loss: 59.490 - mae: 64.277 - mean_q: 33.051 Interval 2010 (1004500 steps performed) 500/500 [==============================] - 12s 24ms/step - reward: -1.8369 4 episodes - episode_reward: -238.067 [-385.021, -116.988] - loss: 40.058 - mae: 62.966 - mean_q: 32.017 Interval 2011 (1005000 steps performed) 500/500 [==============================] - 13s 25ms/step - reward: -1.1776 2 episodes - episode_reward: -305.119 [-500.318, -109.920] - loss: 44.603 - mae: 61.414 - mean_q: 29.904 Interval 2012 (1005500 steps performed) 500/500 [==============================] - 12s 25ms/step - reward: -1.4839 2 episodes - episode_reward: -146.284 [-151.855, -140.713] - loss: 54.921 - mae: 60.145 - mean_q: 28.890 Interval 2013 (1006000 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -1.2295 1 episodes - episode_reward: -846.479 [-846.479, -846.479] - loss: 51.642 - mae: 59.147 - mean_q: 26.552 Interval 2014 (1006500 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -1.3261 4 episodes - episode_reward: -226.698 [-336.801, -136.530] - loss: 41.726 - mae: 56.248 - mean_q: 24.715 Interval 2015 (1007000 steps performed) 500/500 [==============================] - 12s 25ms/step - reward: -1.0302 Interval 2016 (1007500 steps performed) 500/500 [==============================] - 12s 25ms/step - reward: -0.4538 3 episodes - episode_reward: -199.181 [-263.364, -107.186] - loss: 43.544 - mae: 54.404 - mean_q: 23.147 Interval 2017 (1008000 steps performed) 500/500 [==============================] - 2086s 4s/step - reward: -0.6417 1 episodes - episode_reward: -517.856 [-517.856, -517.856] - loss: 44.217 - mae: 53.424 - mean_q: 22.905 Interval 2018 (1008500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8452 1 episodes - episode_reward: -358.626 [-358.626, -358.626] - loss: 36.831 - mae: 52.936 - mean_q: 22.626 Interval 2019 (1009000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2509 1 episodes - episode_reward: -586.196 [-586.196, -586.196] - loss: 39.688 - mae: 51.605 - mean_q: 22.968 Interval 2020 (1009500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -1.5315 2 episodes - episode_reward: -329.981 [-506.620, -153.342] - loss: 46.038 - mae: 49.860 - mean_q: 22.452 Interval 2021 (1010000 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -1.1982 1 episodes - episode_reward: -278.719 [-278.719, -278.719] - loss: 32.452 - mae: 49.295 - mean_q: 21.398 Interval 2022 (1010500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -0.5309 2 episodes - episode_reward: -356.989 [-469.575, -244.404] - loss: 41.425 - mae: 48.456 - mean_q: 21.618 Interval 2023 (1011000 steps performed) 500/500 [==============================] - 7211s 14s/step - reward: -0.8606 1 episodes - episode_reward: -219.772 [-219.772, -219.772] - loss: 39.609 - mae: 47.393 - mean_q: 19.618 Interval 2024 (1011500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8438 2 episodes - episode_reward: -295.208 [-371.774, -218.641] - loss: 44.267 - mae: 46.799 - mean_q: 19.762 Interval 2025 (1012000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0687 2 episodes - episode_reward: -257.690 [-415.365, -100.016] - loss: 34.216 - mae: 45.767 - mean_q: 19.504 Interval 2026 (1012500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4493 3 episodes - episode_reward: -263.377 [-459.585, -135.084] - loss: 38.055 - mae: 45.357 - mean_q: 20.868 Interval 2027 (1013000 steps performed) 500/500 [==============================] - 14s 29ms/step - reward: -1.0756 3 episodes - episode_reward: -150.906 [-209.192, -42.122] - loss: 30.587 - mae: 44.872 - mean_q: 20.713 Interval 2028 (1013500 steps performed) 500/500 [==============================] - 16s 33ms/step - reward: -0.3778 Interval 2029 (1014000 steps performed) 500/500 [==============================] - 7214s 14s/step - reward: -1.8965 3 episodes - episode_reward: -365.897 [-557.552, -120.503] - loss: 39.011 - mae: 44.901 - mean_q: 20.225 Interval 2030 (1014500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5572 3 episodes - episode_reward: -319.258 [-590.329, -142.424] - loss: 38.051 - mae: 44.738 - mean_q: 19.391 Interval 2031 (1015000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8551 1 episodes - episode_reward: -196.848 [-196.848, -196.848] - loss: 39.136 - mae: 44.424 - mean_q: 21.398 Interval 2032 (1015500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8801 3 episodes - episode_reward: -372.217 [-628.823, -184.515] - loss: 38.581 - mae: 43.785 - mean_q: 21.386 Interval 2033 (1016000 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -2.0820 3 episodes - episode_reward: -292.157 [-520.810, -127.724] - loss: 39.248 - mae: 44.351 - mean_q: 21.214 Interval 2034 (1016500 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -1.7205 4 episodes - episode_reward: -253.202 [-324.289, -92.401] - loss: 47.528 - mae: 44.736 - mean_q: 22.520 Interval 2035 (1017000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.0186 3 episodes - episode_reward: -166.372 [-322.193, -52.441] - loss: 38.270 - mae: 44.822 - mean_q: 19.878 Interval 2036 (1017500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -1.7928 4 episodes - episode_reward: -229.278 [-372.843, -123.434] - loss: 37.978 - mae: 44.159 - mean_q: 21.135 Interval 2037 (1018000 steps performed) 500/500 [==============================] - 7207s 14s/step - reward: -1.4360 3 episodes - episode_reward: -238.806 [-339.959, -172.006] - loss: 47.729 - mae: 43.696 - mean_q: 19.627 Interval 2038 (1018500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8081 2 episodes - episode_reward: -186.209 [-262.987, -109.431] - loss: 38.566 - mae: 43.304 - mean_q: 19.359 Interval 2039 (1019000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8510 3 episodes - episode_reward: -131.309 [-186.133, -85.330] - loss: 32.018 - mae: 43.459 - mean_q: 18.517 Interval 2040 (1019500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.6146 4 episodes - episode_reward: -101.648 [-155.528, -29.710] - loss: 45.826 - mae: 43.844 - mean_q: 19.222 Interval 2041 (1020000 steps performed) 500/500 [==============================] - 15s 31ms/step - reward: -1.4051 2 episodes - episode_reward: -327.934 [-349.958, -305.911] - loss: 34.858 - mae: 43.275 - mean_q: 19.899 Interval 2042 (1020500 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -1.0959 2 episodes - episode_reward: -280.283 [-339.410, -221.155] - loss: 33.493 - mae: 43.502 - mean_q: 20.824 Interval 2043 (1021000 steps performed) 500/500 [==============================] - 194s 389ms/step - reward: -1.1469 2 episodes - episode_reward: -269.759 [-303.900, -235.617] - loss: 34.806 - mae: 43.878 - mean_q: 21.018 Interval 2044 (1021500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2955 Interval 2045 (1022000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9060 2 episodes - episode_reward: -292.831 [-329.984, -255.678] - loss: 43.812 - mae: 42.964 - mean_q: 20.601 Interval 2046 (1022500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1615 Interval 2047 (1023000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1863 Interval 2048 (1023500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.8990 1 episodes - episode_reward: -650.540 [-650.540, -650.540] - loss: 39.228 - mae: 43.945 - mean_q: 24.122 Interval 2049 (1024000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0607 3 episodes - episode_reward: -181.590 [-214.447, -154.676] - loss: 32.811 - mae: 43.733 - mean_q: 25.816 Interval 2050 (1024500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5721 1 episodes - episode_reward: -237.304 [-237.304, -237.304] - loss: 39.546 - mae: 43.722 - mean_q: 25.416 Interval 2051 (1025000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2662 1 episodes - episode_reward: -201.075 [-201.075, -201.075] - loss: 40.126 - mae: 44.034 - mean_q: 24.527 Interval 2052 (1025500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0916 2 episodes - episode_reward: -237.991 [-251.895, -224.086] - loss: 43.791 - mae: 44.649 - mean_q: 26.256 Interval 2053 (1026000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6799 3 episodes - episode_reward: -265.044 [-307.966, -216.433] - loss: 37.753 - mae: 44.082 - mean_q: 28.610 Interval 2054 (1026500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8188 2 episodes - episode_reward: -241.744 [-278.049, -205.439] - loss: 34.492 - mae: 43.605 - mean_q: 29.917 Interval 2055 (1027000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8477 2 episodes - episode_reward: -209.814 [-245.513, -174.114] - loss: 37.758 - mae: 43.765 - mean_q: 30.831 Interval 2056 (1027500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0366 3 episodes - episode_reward: -193.382 [-239.340, -146.590] - loss: 38.688 - mae: 43.895 - mean_q: 30.803 Interval 2057 (1028000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2926 3 episodes - episode_reward: -170.408 [-202.060, -131.001] - loss: 38.578 - mae: 44.356 - mean_q: 31.254 Interval 2058 (1028500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9943 2 episodes - episode_reward: -251.423 [-281.622, -221.225] - loss: 33.241 - mae: 43.862 - mean_q: 30.875 Interval 2059 (1029000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -1.4185 4 episodes - episode_reward: -213.970 [-380.435, -108.582] - loss: 36.789 - mae: 44.120 - mean_q: 31.243 Interval 2060 (1029500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.9087 2 episodes - episode_reward: -205.185 [-211.228, -199.143] - loss: 38.112 - mae: 43.860 - mean_q: 31.696 Interval 2061 (1030000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0847 3 episodes - episode_reward: -195.116 [-249.445, -165.576] - loss: 37.501 - mae: 44.529 - mean_q: 32.012 Interval 2062 (1030500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2628 3 episodes - episode_reward: -155.617 [-203.934, -126.073] - loss: 39.357 - mae: 46.128 - mean_q: 33.643 Interval 2063 (1031000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5344 3 episodes - episode_reward: -207.335 [-323.278, -101.341] - loss: 41.118 - mae: 46.724 - mean_q: 33.954 Interval 2064 (1031500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9323 3 episodes - episode_reward: -211.268 [-365.074, -76.846] - loss: 37.609 - mae: 47.062 - mean_q: 35.033 Interval 2065 (1032000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3629 1 episodes - episode_reward: -232.159 [-232.159, -232.159] - loss: 42.028 - mae: 48.153 - mean_q: 37.904 Interval 2066 (1032500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.6526 5 episodes - episode_reward: -274.663 [-413.159, -152.406] - loss: 41.671 - mae: 48.635 - mean_q: 37.913 Interval 2067 (1033000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7384 1 episodes - episode_reward: -254.522 [-254.522, -254.522] - loss: 43.380 - mae: 48.820 - mean_q: 38.252 Interval 2068 (1033500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.3363 1 episodes - episode_reward: -329.017 [-329.017, -329.017] - loss: 41.850 - mae: 49.532 - mean_q: 38.939 Interval 2069 (1034000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4635 4 episodes - episode_reward: -284.687 [-598.942, -49.699] - loss: 32.345 - mae: 50.674 - mean_q: 40.189 Interval 2070 (1034500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6927 1 episodes - episode_reward: -249.024 [-249.024, -249.024] - loss: 35.832 - mae: 50.989 - mean_q: 39.185 Interval 2071 (1035000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.8734 6 episodes - episode_reward: -263.798 [-430.023, -103.584] - loss: 39.395 - mae: 51.788 - mean_q: 41.844 Interval 2072 (1035500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3124 Interval 2073 (1036000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8298 2 episodes - episode_reward: -246.816 [-408.087, -85.545] - loss: 34.606 - mae: 52.338 - mean_q: 42.275 Interval 2074 (1036500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0370 1 episodes - episode_reward: -46.130 [-46.130, -46.130] - loss: 38.153 - mae: 51.348 - mean_q: 40.696 Interval 2075 (1037000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0676 3 episodes - episode_reward: -357.019 [-592.568, -185.210] - loss: 36.778 - mae: 51.333 - mean_q: 39.270 Interval 2076 (1037500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0285 2 episodes - episode_reward: -258.470 [-416.941, -100.000] - loss: 32.636 - mae: 50.819 - mean_q: 39.959 Interval 2077 (1038000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9320 2 episodes - episode_reward: -204.059 [-275.992, -132.127] - loss: 38.190 - mae: 50.765 - mean_q: 40.906 Interval 2078 (1038500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7921 3 episodes - episode_reward: -322.272 [-759.335, -100.000] - loss: 32.691 - mae: 49.874 - mean_q: 39.475 Interval 2079 (1039000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5906 2 episodes - episode_reward: -157.991 [-204.158, -111.824] - loss: 38.914 - mae: 50.699 - mean_q: 39.543 Interval 2080 (1039500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2328 2 episodes - episode_reward: -295.121 [-468.868, -121.375] - loss: 31.127 - mae: 50.672 - mean_q: 39.923 Interval 2081 (1040000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7115 2 episodes - episode_reward: -124.749 [-176.256, -73.242] - loss: 32.409 - mae: 51.221 - mean_q: 40.490 Interval 2082 (1040500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8142 1 episodes - episode_reward: -348.679 [-348.679, -348.679] - loss: 41.133 - mae: 50.791 - mean_q: 39.886 Interval 2083 (1041000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8772 2 episodes - episode_reward: -296.950 [-488.589, -105.311] - loss: 36.302 - mae: 50.980 - mean_q: 41.696 Interval 2084 (1041500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3055 3 episodes - episode_reward: -214.596 [-334.225, -99.217] - loss: 38.115 - mae: 49.584 - mean_q: 41.741 Interval 2085 (1042000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0443 3 episodes - episode_reward: -166.649 [-337.195, -58.576] - loss: 32.838 - mae: 49.842 - mean_q: 42.242 Interval 2086 (1042500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4313 1 episodes - episode_reward: -236.916 [-236.916, -236.916] - loss: 35.185 - mae: 49.328 - mean_q: 43.020 Interval 2087 (1043000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2189 2 episodes - episode_reward: -311.605 [-320.510, -302.700] - loss: 37.940 - mae: 49.342 - mean_q: 43.624 Interval 2088 (1043500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8224 2 episodes - episode_reward: -192.414 [-268.042, -116.786] - loss: 37.758 - mae: 49.862 - mean_q: 44.813 Interval 2089 (1044000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2492 2 episodes - episode_reward: -295.577 [-373.496, -217.658] - loss: 40.328 - mae: 50.484 - mean_q: 45.030 Interval 2090 (1044500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4300 3 episodes - episode_reward: -256.515 [-324.343, -193.659] - loss: 39.795 - mae: 50.041 - mean_q: 45.027 Interval 2091 (1045000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6959 2 episodes - episode_reward: -180.713 [-247.788, -113.638] - loss: 37.891 - mae: 51.417 - mean_q: 48.210 Interval 2092 (1045500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4680 3 episodes - episode_reward: -255.121 [-523.120, -112.043] - loss: 45.949 - mae: 52.149 - mean_q: 49.848 Interval 2093 (1046000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8218 3 episodes - episode_reward: -77.017 [-128.086, -2.966] - loss: 48.586 - mae: 53.042 - mean_q: 51.592 Interval 2094 (1046500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.3378 2 episodes - episode_reward: -366.030 [-388.909, -343.151] - loss: 39.988 - mae: 53.736 - mean_q: 52.159 Interval 2095 (1047000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6158 4 episodes - episode_reward: -220.717 [-392.576, -100.000] - loss: 32.725 - mae: 55.096 - mean_q: 54.429 Interval 2096 (1047500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0656 3 episodes - episode_reward: -343.650 [-732.010, -144.313] - loss: 39.853 - mae: 56.914 - mean_q: 56.656 Interval 2097 (1048000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2812 2 episodes - episode_reward: -256.477 [-289.140, -223.815] - loss: 53.387 - mae: 57.325 - mean_q: 57.952 Interval 2098 (1048500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0160 1 episodes - episode_reward: -351.535 [-351.535, -351.535] - loss: 44.201 - mae: 58.028 - mean_q: 58.818 Interval 2099 (1049000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4542 1 episodes - episode_reward: -348.913 [-348.913, -348.913] - loss: 35.587 - mae: 59.318 - mean_q: 59.133 Interval 2100 (1049500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9493 2 episodes - episode_reward: -167.987 [-235.975, -100.000] - loss: 66.515 - mae: 59.920 - mean_q: 59.997 Interval 2101 (1050000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9176 2 episodes - episode_reward: -260.337 [-340.390, -180.284] - loss: 36.709 - mae: 59.545 - mean_q: 59.333 Interval 2102 (1050500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1593 3 episodes - episode_reward: -255.706 [-328.179, -122.338] - loss: 41.017 - mae: 59.556 - mean_q: 58.614 Interval 2103 (1051000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4696 1 episodes - episode_reward: -292.479 [-292.479, -292.479] - loss: 40.185 - mae: 60.137 - mean_q: 59.504 Interval 2104 (1051500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0778 2 episodes - episode_reward: -169.391 [-227.846, -110.937] - loss: 36.320 - mae: 59.667 - mean_q: 58.852 Interval 2105 (1052000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5909 1 episodes - episode_reward: -338.734 [-338.734, -338.734] - loss: 35.707 - mae: 59.814 - mean_q: 58.437 Interval 2106 (1052500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.5884 3 episodes - episode_reward: -401.237 [-617.414, -214.289] - loss: 38.087 - mae: 59.358 - mean_q: 56.766 Interval 2107 (1053000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9204 1 episodes - episode_reward: -509.136 [-509.136, -509.136] - loss: 47.984 - mae: 59.263 - mean_q: 56.259 Interval 2108 (1053500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3834 2 episodes - episode_reward: -422.468 [-581.939, -262.997] - loss: 40.357 - mae: 58.464 - mean_q: 57.374 Interval 2109 (1054000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2694 Interval 2110 (1054500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9362 1 episodes - episode_reward: -396.066 [-396.066, -396.066] - loss: 39.710 - mae: 59.686 - mean_q: 58.653 Interval 2111 (1055000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0209 4 episodes - episode_reward: -282.606 [-480.242, -135.062] - loss: 48.781 - mae: 59.392 - mean_q: 59.507 Interval 2112 (1055500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4494 3 episodes - episode_reward: -449.048 [-712.162, -306.622] - loss: 38.830 - mae: 59.137 - mean_q: 58.813 Interval 2113 (1056000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4765 3 episodes - episode_reward: -177.501 [-220.706, -128.501] - loss: 39.391 - mae: 58.393 - mean_q: 58.100 Interval 2114 (1056500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3532 7 episodes - episode_reward: -199.356 [-643.241, -97.287] - loss: 42.042 - mae: 57.420 - mean_q: 57.481 Interval 2115 (1057000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3106 4 episodes - episode_reward: -175.361 [-245.962, -112.056] - loss: 41.519 - mae: 58.119 - mean_q: 58.038 Interval 2116 (1057500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3727 2 episodes - episode_reward: -76.835 [-89.153, -64.517] - loss: 38.945 - mae: 57.376 - mean_q: 55.483 Interval 2117 (1058000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7515 4 episodes - episode_reward: -77.966 [-100.000, -49.682] - loss: 42.494 - mae: 57.828 - mean_q: 56.220 Interval 2118 (1058500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8023 3 episodes - episode_reward: -250.383 [-286.314, -214.823] - loss: 39.057 - mae: 58.108 - mean_q: 54.753 Interval 2119 (1059000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4342 4 episodes - episode_reward: -228.601 [-503.922, -54.771] - loss: 41.266 - mae: 57.984 - mean_q: 54.471 Interval 2120 (1059500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8437 2 episodes - episode_reward: -212.469 [-315.447, -109.490] - loss: 40.112 - mae: 57.751 - mean_q: 53.867 Interval 2121 (1060000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9122 1 episodes - episode_reward: -366.912 [-366.912, -366.912] - loss: 35.030 - mae: 57.227 - mean_q: 52.155 Interval 2122 (1060500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1547 4 episodes - episode_reward: -266.731 [-482.408, -63.788] - loss: 57.765 - mae: 56.967 - mean_q: 52.326 Interval 2123 (1061000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5337 3 episodes - episode_reward: -140.852 [-255.760, -16.698] - loss: 42.475 - mae: 56.591 - mean_q: 52.489 Interval 2124 (1061500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7855 2 episodes - episode_reward: -149.256 [-274.940, -23.572] - loss: 37.232 - mae: 56.142 - mean_q: 51.328 Interval 2125 (1062000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.1734 6 episodes - episode_reward: -266.190 [-689.390, -42.719] - loss: 76.048 - mae: 56.537 - mean_q: 50.916 Interval 2126 (1062500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0459 4 episodes - episode_reward: -255.655 [-416.883, -76.097] - loss: 48.269 - mae: 55.934 - mean_q: 49.846 Interval 2127 (1063000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0264 1 episodes - episode_reward: -78.688 [-78.688, -78.688] - loss: 49.518 - mae: 56.789 - mean_q: 50.943 Interval 2128 (1063500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1090 5 episodes - episode_reward: -104.137 [-152.913, 13.235] - loss: 45.851 - mae: 56.960 - mean_q: 50.561 Interval 2129 (1064000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0881 4 episodes - episode_reward: -134.738 [-221.052, 28.211] - loss: 50.659 - mae: 57.377 - mean_q: 52.067 Interval 2130 (1064500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0469 3 episodes - episode_reward: -334.861 [-478.044, -82.689] - loss: 40.156 - mae: 58.005 - mean_q: 52.545 Interval 2131 (1065000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0052 2 episodes - episode_reward: -18.126 [-21.113, -15.140] - loss: 42.120 - mae: 59.722 - mean_q: 53.146 Interval 2132 (1065500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1933 2 episodes - episode_reward: -62.972 [-73.473, -52.471] - loss: 42.123 - mae: 60.152 - mean_q: 54.283 Interval 2133 (1066000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7725 2 episodes - episode_reward: -379.490 [-666.301, -92.678] - loss: 55.850 - mae: 62.801 - mean_q: 57.463 Interval 2134 (1066500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4189 4 episodes - episode_reward: -76.393 [-157.336, -30.957] - loss: 43.169 - mae: 63.095 - mean_q: 60.520 Interval 2135 (1067000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1968 1 episodes - episode_reward: -75.931 [-75.931, -75.931] - loss: 56.675 - mae: 64.776 - mean_q: 63.796 Interval 2136 (1067500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1300 3 episodes - episode_reward: -221.580 [-485.966, -33.666] - loss: 46.753 - mae: 66.603 - mean_q: 66.817 Interval 2137 (1068000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6829 4 episodes - episode_reward: -183.272 [-378.381, 4.882] - loss: 47.855 - mae: 69.055 - mean_q: 68.015 Interval 2138 (1068500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9441 4 episodes - episode_reward: -369.991 [-569.558, -123.063] - loss: 49.259 - mae: 70.864 - mean_q: 68.068 Interval 2139 (1069000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3053 1 episodes - episode_reward: -73.759 [-73.759, -73.759] - loss: 53.088 - mae: 72.241 - mean_q: 70.097 Interval 2140 (1069500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2898 7 episodes - episode_reward: -165.370 [-285.637, -88.979] - loss: 54.271 - mae: 71.721 - mean_q: 71.204 Interval 2141 (1070000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4294 1 episodes - episode_reward: -215.368 [-215.368, -215.368] - loss: 47.973 - mae: 72.335 - mean_q: 71.400 Interval 2142 (1070500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7874 1 episodes - episode_reward: -439.834 [-439.834, -439.834] - loss: 47.769 - mae: 73.428 - mean_q: 72.687 Interval 2143 (1071000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2766 2 episodes - episode_reward: -330.485 [-508.593, -152.377] - loss: 46.568 - mae: 73.494 - mean_q: 72.657 Interval 2144 (1071500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1046 2 episodes - episode_reward: -8.433 [-23.458, 6.592] - loss: 46.055 - mae: 74.419 - mean_q: 74.555 Interval 2145 (1072000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2007 3 episodes - episode_reward: -167.995 [-185.405, -145.292] - loss: 43.811 - mae: 73.997 - mean_q: 72.687 Interval 2146 (1072500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4869 1 episodes - episode_reward: -289.654 [-289.654, -289.654] - loss: 43.319 - mae: 72.624 - mean_q: 70.488 Interval 2147 (1073000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9751 4 episodes - episode_reward: -142.245 [-262.585, -41.824] - loss: 38.802 - mae: 72.231 - mean_q: 69.036 Interval 2148 (1073500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.7221 7 episodes - episode_reward: -187.059 [-468.059, -86.660] - loss: 47.108 - mae: 72.056 - mean_q: 69.178 Interval 2149 (1074000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5501 3 episodes - episode_reward: -284.051 [-392.659, -208.267] - loss: 41.849 - mae: 70.991 - mean_q: 66.987 Interval 2150 (1074500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3481 4 episodes - episode_reward: -165.786 [-272.427, -53.485] - loss: 40.789 - mae: 69.624 - mean_q: 65.060 Interval 2151 (1075000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7510 1 episodes - episode_reward: -294.531 [-294.531, -294.531] - loss: 45.636 - mae: 70.045 - mean_q: 64.192 Interval 2152 (1075500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7942 4 episodes - episode_reward: -209.432 [-369.537, -4.535] - loss: 47.625 - mae: 69.376 - mean_q: 63.076 Interval 2153 (1076000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2516 Interval 2154 (1076500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -1.5204 Interval 2155 (1077000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1320 3 episodes - episode_reward: -510.566 [-1061.518, -27.403] - loss: 41.161 - mae: 71.813 - mean_q: 67.841 Interval 2156 (1077500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3000 3 episodes - episode_reward: -248.031 [-450.270, -138.782] - loss: 49.711 - mae: 71.457 - mean_q: 68.912 Interval 2157 (1078000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3893 1 episodes - episode_reward: 28.454 [28.454, 28.454] - loss: 40.372 - mae: 71.622 - mean_q: 69.711 Interval 2158 (1078500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2214 Interval 2159 (1079000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1311 Interval 2160 (1079500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.7252 2 episodes - episode_reward: -357.596 [-603.459, -111.733] - loss: 42.564 - mae: 73.518 - mean_q: 74.574 Interval 2161 (1080000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3532 1 episodes - episode_reward: -569.555 [-569.555, -569.555] - loss: 48.553 - mae: 74.514 - mean_q: 77.430 Interval 2162 (1080500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6426 Interval 2163 (1081000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.3804 Interval 2164 (1081500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0112 Interval 2165 (1082000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.6350 2 episodes - episode_reward: -397.940 [-674.810, -121.070] - loss: 47.790 - mae: 82.362 - mean_q: 90.755 Interval 2166 (1082500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0900 Interval 2167 (1083000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.3811 Interval 2168 (1083500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0738 1 episodes - episode_reward: -424.068 [-424.068, -424.068] - loss: 40.476 - mae: 87.423 - mean_q: 100.794 Interval 2169 (1084000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2992 Interval 2170 (1084500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.3975 Interval 2171 (1085000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1879 Interval 2172 (1085500 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.1160 Interval 2173 (1086000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.0882 Interval 2174 (1086500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.3449 Interval 2175 (1087000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.0484 Interval 2176 (1087500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.3101 1 episodes - episode_reward: -893.900 [-893.900, -893.900] - loss: 43.631 - mae: 91.034 - mean_q: 103.568 Interval 2177 (1088000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1139 5 episodes - episode_reward: -175.904 [-368.517, -54.403] - loss: 40.293 - mae: 89.778 - mean_q: 102.158 Interval 2178 (1088500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1763 Interval 2179 (1089000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6784 2 episodes - episode_reward: -484.997 [-885.110, -84.884] - loss: 39.064 - mae: 91.034 - mean_q: 103.050 Interval 2180 (1089500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0771 Interval 2181 (1090000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1217 Interval 2182 (1090500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0559 Interval 2183 (1091000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.8100 1 episodes - episode_reward: -562.870 [-562.870, -562.870] - loss: 34.623 - mae: 91.583 - mean_q: 103.803 Interval 2184 (1091500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7574 2 episodes - episode_reward: -157.140 [-190.744, -123.536] - loss: 39.983 - mae: 91.899 - mean_q: 105.046 Interval 2185 (1092000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1119 Interval 2186 (1092500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2007 Interval 2187 (1093000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0269 Interval 2188 (1093500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2302 Interval 2189 (1094000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1525 Interval 2190 (1094500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.6374 1 episodes - episode_reward: -739.662 [-739.662, -739.662] - loss: 31.788 - mae: 90.256 - mean_q: 105.125 Interval 2191 (1095000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2872 1 episodes - episode_reward: -236.886 [-236.886, -236.886] - loss: 34.596 - mae: 88.988 - mean_q: 105.248 Interval 2192 (1095500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3794 Interval 2193 (1096000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0843 Interval 2194 (1096500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1419 Interval 2195 (1097000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2972 Interval 2196 (1097500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2811 1 episodes - episode_reward: -496.595 [-496.595, -496.595] - loss: 35.439 - mae: 91.709 - mean_q: 112.250 Interval 2197 (1098000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1565 Interval 2198 (1098500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2425 1 episodes - episode_reward: -184.543 [-184.543, -184.543] - loss: 34.785 - mae: 91.926 - mean_q: 113.865 Interval 2199 (1099000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0302 Interval 2200 (1099500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1570 Interval 2201 (1100000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1868 Interval 2202 (1100500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2364 Interval 2203 (1101000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1172 Interval 2204 (1101500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1689 Interval 2205 (1102000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2578 Interval 2206 (1102500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.1725 Interval 2207 (1103000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -1.5482 4 episodes - episode_reward: -361.993 [-977.166, -100.000] - loss: 36.253 - mae: 87.115 - mean_q: 107.309 Interval 2208 (1103500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6499 1 episodes - episode_reward: -287.228 [-287.228, -287.228] - loss: 28.071 - mae: 86.989 - mean_q: 107.489 Interval 2209 (1104000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2636 1 episodes - episode_reward: -740.030 [-740.030, -740.030] - loss: 35.984 - mae: 85.942 - mean_q: 104.480 Interval 2210 (1104500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7286 1 episodes - episode_reward: -71.994 [-71.994, -71.994] - loss: 30.760 - mae: 85.242 - mean_q: 102.220 Interval 2211 (1105000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5912 1 episodes - episode_reward: -357.032 [-357.032, -357.032] - loss: 30.407 - mae: 84.304 - mean_q: 100.933 Interval 2212 (1105500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2691 3 episodes - episode_reward: -263.531 [-351.977, -138.652] - loss: 32.587 - mae: 83.830 - mean_q: 99.960 Interval 2213 (1106000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5427 2 episodes - episode_reward: -163.781 [-179.330, -148.232] - loss: 31.892 - mae: 83.102 - mean_q: 98.630 Interval 2214 (1106500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0592 Interval 2215 (1107000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6898 2 episodes - episode_reward: -129.734 [-141.259, -118.208] - loss: 23.986 - mae: 80.430 - mean_q: 94.796 Interval 2216 (1107500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.8595 5 episodes - episode_reward: -301.868 [-620.132, -132.339] - loss: 30.447 - mae: 78.877 - mean_q: 91.939 Interval 2217 (1108000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8178 2 episodes - episode_reward: -167.028 [-174.591, -159.464] - loss: 25.253 - mae: 77.396 - mean_q: 90.158 Interval 2218 (1108500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8998 1 episodes - episode_reward: -353.536 [-353.536, -353.536] - loss: 28.130 - mae: 76.579 - mean_q: 88.469 Interval 2219 (1109000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2661 2 episodes - episode_reward: -391.602 [-399.802, -383.401] - loss: 24.703 - mae: 75.983 - mean_q: 87.222 Interval 2220 (1109500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5609 3 episodes - episode_reward: -255.198 [-365.358, -137.388] - loss: 28.799 - mae: 75.434 - mean_q: 85.568 Interval 2221 (1110000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0055 2 episodes - episode_reward: -300.739 [-316.537, -284.942] - loss: 28.402 - mae: 75.191 - mean_q: 84.927 Interval 2222 (1110500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2320 Interval 2223 (1111000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.3575 1 episodes - episode_reward: -300.749 [-300.749, -300.749] - loss: 34.060 - mae: 72.964 - mean_q: 82.177 Interval 2224 (1111500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1194 Interval 2225 (1112000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1325 Interval 2226 (1112500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2143 Interval 2227 (1113000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.3283 Interval 2228 (1113500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1786 5 episodes - episode_reward: -176.361 [-393.552, -81.107] - loss: 30.367 - mae: 68.765 - mean_q: 79.133 Interval 2229 (1114000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3008 1 episodes - episode_reward: -127.685 [-127.685, -127.685] - loss: 23.919 - mae: 69.049 - mean_q: 80.101 Interval 2230 (1114500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8706 2 episodes - episode_reward: -230.548 [-337.529, -123.568] - loss: 20.849 - mae: 68.288 - mean_q: 78.355 Interval 2231 (1115000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1658 Interval 2232 (1115500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7628 2 episodes - episode_reward: -284.477 [-334.587, -234.367] - loss: 20.374 - mae: 68.461 - mean_q: 79.971 Interval 2233 (1116000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1650 Interval 2234 (1116500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2634 Interval 2235 (1117000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3423 1 episodes - episode_reward: -225.866 [-225.866, -225.866] - loss: 23.086 - mae: 68.536 - mean_q: 82.298 Interval 2236 (1117500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6357 2 episodes - episode_reward: -189.825 [-234.933, -144.716] - loss: 20.555 - mae: 68.303 - mean_q: 82.295 Interval 2237 (1118000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3722 1 episodes - episode_reward: -238.029 [-238.029, -238.029] - loss: 36.315 - mae: 67.525 - mean_q: 81.947 Interval 2238 (1118500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4371 1 episodes - episode_reward: -196.045 [-196.045, -196.045] - loss: 19.293 - mae: 66.213 - mean_q: 80.850 Interval 2239 (1119000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1493 Interval 2240 (1119500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1642 2 episodes - episode_reward: -329.131 [-418.024, -240.237] - loss: 17.097 - mae: 65.476 - mean_q: 80.548 Interval 2241 (1120000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2078 Interval 2242 (1120500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2686 1 episodes - episode_reward: -285.363 [-285.363, -285.363] - loss: 24.695 - mae: 64.127 - mean_q: 79.478 Interval 2243 (1121000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8224 2 episodes - episode_reward: -153.079 [-174.351, -131.807] - loss: 14.367 - mae: 63.391 - mean_q: 78.568 Interval 2244 (1121500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.8285 Interval 2245 (1122000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4122 2 episodes - episode_reward: -353.939 [-577.435, -130.442] - loss: 14.081 - mae: 61.611 - mean_q: 75.751 Interval 2246 (1122500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.5864 1 episodes - episode_reward: -165.985 [-165.985, -165.985] - loss: 20.970 - mae: 61.049 - mean_q: 74.163 Interval 2247 (1123000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5831 5 episodes - episode_reward: -183.450 [-266.067, -119.470] - loss: 18.071 - mae: 59.613 - mean_q: 71.605 Interval 2248 (1123500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2893 3 episodes - episode_reward: -182.374 [-253.576, -100.000] - loss: 15.916 - mae: 58.621 - mean_q: 70.436 Interval 2249 (1124000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.8316 2 episodes - episode_reward: -209.679 [-230.181, -189.178] - loss: 15.672 - mae: 57.929 - mean_q: 69.125 Interval 2250 (1124500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.7335 2 episodes - episode_reward: -239.595 [-242.292, -236.899] - loss: 15.767 - mae: 57.284 - mean_q: 68.224 Interval 2251 (1125000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1881 Interval 2252 (1125500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -1.2696 2 episodes - episode_reward: -342.797 [-354.123, -331.471] - loss: 13.091 - mae: 54.790 - mean_q: 65.100 Interval 2253 (1126000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -1.0140 2 episodes - episode_reward: -200.800 [-225.055, -176.544] - loss: 13.676 - mae: 53.521 - mean_q: 62.506 Interval 2254 (1126500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8169 2 episodes - episode_reward: -245.110 [-259.631, -230.589] - loss: 15.840 - mae: 52.425 - mean_q: 60.632 Interval 2255 (1127000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8910 3 episodes - episode_reward: -153.497 [-225.720, -108.127] - loss: 15.473 - mae: 50.767 - mean_q: 58.565 Interval 2256 (1127500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9422 3 episodes - episode_reward: -131.666 [-187.133, -24.697] - loss: 14.291 - mae: 50.840 - mean_q: 58.043 Interval 2257 (1128000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8188 3 episodes - episode_reward: -143.686 [-184.239, -86.689] - loss: 13.260 - mae: 50.289 - mean_q: 57.847 Interval 2258 (1128500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0766 3 episodes - episode_reward: -203.936 [-266.795, -104.954] - loss: 13.925 - mae: 48.437 - mean_q: 54.933 Interval 2259 (1129000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5726 4 episodes - episode_reward: -169.284 [-206.216, -119.798] - loss: 14.946 - mae: 47.658 - mean_q: 53.435 Interval 2260 (1129500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7324 2 episodes - episode_reward: -184.729 [-251.641, -117.816] - loss: 13.464 - mae: 46.609 - mean_q: 52.055 Interval 2261 (1130000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5655 2 episodes - episode_reward: -193.168 [-280.164, -106.171] - loss: 14.828 - mae: 46.041 - mean_q: 50.304 Interval 2262 (1130500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4167 Interval 2263 (1131000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8292 3 episodes - episode_reward: -220.165 [-315.787, -156.668] - loss: 14.977 - mae: 44.544 - mean_q: 48.565 Interval 2264 (1131500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3247 1 episodes - episode_reward: -123.268 [-123.268, -123.268] - loss: 14.589 - mae: 44.199 - mean_q: 47.750 Interval 2265 (1132000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7212 1 episodes - episode_reward: -367.594 [-367.594, -367.594] - loss: 18.724 - mae: 43.477 - mean_q: 46.646 Interval 2266 (1132500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9246 4 episodes - episode_reward: -110.249 [-169.802, -33.164] - loss: 15.314 - mae: 42.840 - mean_q: 46.432 Interval 2267 (1133000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6972 2 episodes - episode_reward: -159.444 [-160.873, -158.014] - loss: 14.603 - mae: 42.235 - mean_q: 45.725 Interval 2268 (1133500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3721 1 episodes - episode_reward: -250.612 [-250.612, -250.612] - loss: 15.267 - mae: 42.026 - mean_q: 45.356 Interval 2269 (1134000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2339 1 episodes - episode_reward: -582.538 [-582.538, -582.538] - loss: 17.143 - mae: 42.649 - mean_q: 46.080 Interval 2270 (1134500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1395 Interval 2271 (1135000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0238 2 episodes - episode_reward: -94.530 [-152.901, -36.159] - loss: 21.564 - mae: 42.793 - mean_q: 46.291 Interval 2272 (1135500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2157 1 episodes - episode_reward: -73.443 [-73.443, -73.443] - loss: 15.351 - mae: 43.147 - mean_q: 47.991 Interval 2273 (1136000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2316 Interval 2274 (1136500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1632 Interval 2275 (1137000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2363 Interval 2276 (1137500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0561 Interval 2277 (1138000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2633 Interval 2278 (1138500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1647 Interval 2279 (1139000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.2193 Interval 2280 (1139500 steps performed) 500/500 [==============================] - 9s 19ms/step - reward: -0.1053 Interval 2281 (1140000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1624 Interval 2282 (1140500 steps performed) 500/500 [==============================] - 9s 17ms/step - reward: -0.4348 1 episodes - episode_reward: -956.606 [-956.606, -956.606] - loss: 16.684 - mae: 47.714 - mean_q: 53.927 Interval 2283 (1141000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2490 Interval 2284 (1141500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5982 1 episodes - episode_reward: -324.885 [-324.885, -324.885] - loss: 13.148 - mae: 46.322 - mean_q: 52.364 Interval 2285 (1142000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2824 1 episodes - episode_reward: -206.776 [-206.776, -206.776] - loss: 15.694 - mae: 46.089 - mean_q: 52.353 Interval 2286 (1142500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1791 Interval 2287 (1143000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1912 Interval 2288 (1143500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2171 1 episodes - episode_reward: -314.106 [-314.106, -314.106] - loss: 13.859 - mae: 45.115 - mean_q: 51.334 Interval 2289 (1144000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1983 1 episodes - episode_reward: -140.574 [-140.574, -140.574] - loss: 12.741 - mae: 44.914 - mean_q: 50.638 Interval 2290 (1144500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.5776 1 episodes - episode_reward: -241.831 [-241.831, -241.831] - loss: 13.275 - mae: 44.608 - mean_q: 50.144 Interval 2291 (1145000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8510 4 episodes - episode_reward: -108.955 [-120.595, -99.790] - loss: 14.819 - mae: 44.639 - mean_q: 50.765 Interval 2292 (1145500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5606 2 episodes - episode_reward: -126.430 [-161.000, -91.860] - loss: 15.962 - mae: 44.358 - mean_q: 50.931 Interval 2293 (1146000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8396 4 episodes - episode_reward: -115.576 [-227.585, -33.866] - loss: 16.537 - mae: 43.658 - mean_q: 50.284 Interval 2294 (1146500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.0122 Interval 2295 (1147000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.5217 1 episodes - episode_reward: -215.689 [-215.689, -215.689] - loss: 13.869 - mae: 43.508 - mean_q: 49.977 Interval 2296 (1147500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7601 1 episodes - episode_reward: -194.651 [-194.651, -194.651] - loss: 19.321 - mae: 42.658 - mean_q: 48.426 Interval 2297 (1148000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4708 2 episodes - episode_reward: -260.761 [-412.045, -109.478] - loss: 16.503 - mae: 42.380 - mean_q: 47.984 Interval 2298 (1148500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7367 2 episodes - episode_reward: -160.471 [-254.333, -66.609] - loss: 17.573 - mae: 42.069 - mean_q: 47.358 Interval 2299 (1149000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1400 Interval 2300 (1149500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7013 3 episodes - episode_reward: -162.746 [-233.151, -104.235] - loss: 15.862 - mae: 41.828 - mean_q: 46.699 Interval 2301 (1150000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.2052 1 episodes - episode_reward: 187.861 [187.861, 187.861] - loss: 14.655 - mae: 41.905 - mean_q: 47.058 Interval 2302 (1150500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0155 Interval 2303 (1151000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.5466 1 episodes - episode_reward: -253.359 [-253.359, -253.359] - loss: 14.115 - mae: 42.438 - mean_q: 47.859 Interval 2304 (1151500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0051 Interval 2305 (1152000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1434 Interval 2306 (1152500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.4266 Interval 2307 (1153000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.3987 1 episodes - episode_reward: -437.652 [-437.652, -437.652] - loss: 13.549 - mae: 41.011 - mean_q: 46.195 Interval 2308 (1153500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1209 Interval 2309 (1154000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3920 1 episodes - episode_reward: -271.063 [-271.063, -271.063] - loss: 19.295 - mae: 40.644 - mean_q: 46.369 Interval 2310 (1154500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4656 1 episodes - episode_reward: -281.438 [-281.438, -281.438] - loss: 16.431 - mae: 40.390 - mean_q: 46.359 Interval 2311 (1155000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1091 Interval 2312 (1155500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2444 Interval 2313 (1156000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6786 2 episodes - episode_reward: -246.259 [-407.905, -84.614] - loss: 16.405 - mae: 40.146 - mean_q: 45.971 Interval 2314 (1156500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1913 Interval 2315 (1157000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9184 2 episodes - episode_reward: -274.051 [-416.762, -131.339] - loss: 14.448 - mae: 39.839 - mean_q: 44.994 Interval 2316 (1157500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1145 1 episodes - episode_reward: -626.564 [-626.564, -626.564] - loss: 16.185 - mae: 39.586 - mean_q: 44.632 Interval 2317 (1158000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5084 2 episodes - episode_reward: -127.610 [-215.742, -39.478] - loss: 16.957 - mae: 39.624 - mean_q: 44.078 Interval 2318 (1158500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2149 2 episodes - episode_reward: -180.947 [-254.488, -107.407] - loss: 15.646 - mae: 39.204 - mean_q: 44.343 Interval 2319 (1159000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9706 1 episodes - episode_reward: -509.411 [-509.411, -509.411] - loss: 17.510 - mae: 38.865 - mean_q: 43.904 Interval 2320 (1159500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3667 2 episodes - episode_reward: -444.142 [-694.620, -193.664] - loss: 16.843 - mae: 38.902 - mean_q: 43.532 Interval 2321 (1160000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7586 2 episodes - episode_reward: -167.765 [-300.933, -34.597] - loss: 16.917 - mae: 39.239 - mean_q: 44.667 Interval 2322 (1160500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0054 Interval 2323 (1161000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5162 1 episodes - episode_reward: -196.350 [-196.350, -196.350] - loss: 14.711 - mae: 40.120 - mean_q: 46.217 Interval 2324 (1161500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0330 Interval 2325 (1162000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2378 Interval 2326 (1162500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.9837 1 episodes - episode_reward: -503.414 [-503.414, -503.414] - loss: 19.689 - mae: 40.963 - mean_q: 46.562 Interval 2327 (1163000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5188 1 episodes - episode_reward: -374.548 [-374.548, -374.548] - loss: 16.822 - mae: 40.405 - mean_q: 45.024 Interval 2328 (1163500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.9350 2 episodes - episode_reward: -272.024 [-440.055, -103.993] - loss: 16.199 - mae: 41.017 - mean_q: 46.147 Interval 2329 (1164000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3370 Interval 2330 (1164500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2908 Interval 2331 (1165000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1855 Interval 2332 (1165500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1734 Interval 2333 (1166000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2832 1 episodes - episode_reward: -622.436 [-622.436, -622.436] - loss: 16.672 - mae: 42.183 - mean_q: 49.908 Interval 2334 (1166500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2749 Interval 2335 (1167000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.5202 Interval 2336 (1167500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6566 2 episodes - episode_reward: -355.341 [-595.161, -115.521] - loss: 19.499 - mae: 43.354 - mean_q: 50.980 Interval 2337 (1168000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1651 Interval 2338 (1168500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.6019 Interval 2339 (1169000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6956 4 episodes - episode_reward: -314.473 [-514.325, -167.310] - loss: 16.429 - mae: 43.854 - mean_q: 51.390 Interval 2340 (1169500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6230 2 episodes - episode_reward: -193.100 [-221.184, -165.015] - loss: 14.935 - mae: 43.911 - mean_q: 50.965 Interval 2341 (1170000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4747 2 episodes - episode_reward: -326.328 [-343.869, -308.786] - loss: 15.376 - mae: 43.880 - mean_q: 51.356 Interval 2342 (1170500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2197 Interval 2343 (1171000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6798 3 episodes - episode_reward: -292.691 [-340.493, -263.471] - loss: 21.570 - mae: 45.467 - mean_q: 52.237 Interval 2344 (1171500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9999 3 episodes - episode_reward: -355.496 [-493.507, -181.868] - loss: 18.047 - mae: 46.393 - mean_q: 52.786 Interval 2345 (1172000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5827 4 episodes - episode_reward: -210.563 [-407.230, -119.329] - loss: 19.303 - mae: 46.951 - mean_q: 54.268 Interval 2346 (1172500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7390 3 episodes - episode_reward: -274.616 [-319.847, -234.208] - loss: 13.615 - mae: 48.040 - mean_q: 55.245 Interval 2347 (1173000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0149 2 episodes - episode_reward: -245.422 [-322.634, -168.209] - loss: 22.029 - mae: 48.644 - mean_q: 56.294 Interval 2348 (1173500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8893 2 episodes - episode_reward: -153.495 [-180.619, -126.371] - loss: 18.164 - mae: 49.194 - mean_q: 57.061 Interval 2349 (1174000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2392 2 episodes - episode_reward: -302.206 [-382.415, -221.996] - loss: 21.654 - mae: 50.119 - mean_q: 58.319 Interval 2350 (1174500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2855 1 episodes - episode_reward: -266.196 [-266.196, -266.196] - loss: 19.314 - mae: 50.266 - mean_q: 58.717 Interval 2351 (1175000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9825 2 episodes - episode_reward: -251.093 [-287.605, -214.580] - loss: 30.882 - mae: 51.063 - mean_q: 58.630 Interval 2352 (1175500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1906 Interval 2353 (1176000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.4451 1 episodes - episode_reward: -334.922 [-334.922, -334.922] - loss: 35.326 - mae: 51.245 - mean_q: 59.662 Interval 2354 (1176500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3858 1 episodes - episode_reward: -182.464 [-182.464, -182.464] - loss: 19.579 - mae: 51.215 - mean_q: 60.095 Interval 2355 (1177000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2093 Interval 2356 (1177500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1226 Interval 2357 (1178000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2417 Interval 2358 (1178500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1975 Interval 2359 (1179000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1986 Interval 2360 (1179500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.6980 Interval 2361 (1180000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0702 2 episodes - episode_reward: -470.553 [-836.381, -104.725] - loss: 28.777 - mae: 54.192 - mean_q: 67.573 Interval 2362 (1180500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5750 Interval 2363 (1181000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.7808 1 episodes - episode_reward: -613.990 [-613.990, -613.990] - loss: 19.304 - mae: 54.841 - mean_q: 68.160 Interval 2364 (1181500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4979 1 episodes - episode_reward: -133.792 [-133.792, -133.792] - loss: 18.430 - mae: 54.987 - mean_q: 68.344 Interval 2365 (1182000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3001 2 episodes - episode_reward: -385.473 [-485.434, -285.513] - loss: 20.547 - mae: 55.194 - mean_q: 67.958 Interval 2366 (1182500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0373 Interval 2367 (1183000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3764 Interval 2368 (1183500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2269 Interval 2369 (1184000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.6359 1 episodes - episode_reward: -685.939 [-685.939, -685.939] - loss: 22.722 - mae: 54.563 - mean_q: 67.176 Interval 2370 (1184500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0916 1 episodes - episode_reward: -92.325 [-92.325, -92.325] - loss: 20.788 - mae: 54.258 - mean_q: 66.685 Interval 2371 (1185000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1249 Interval 2372 (1185500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2014 1 episodes - episode_reward: -164.521 [-164.521, -164.521] - loss: 22.160 - mae: 55.207 - mean_q: 68.015 Interval 2373 (1186000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9101 1 episodes - episode_reward: -295.856 [-295.856, -295.856] - loss: 19.414 - mae: 55.613 - mean_q: 68.045 Interval 2374 (1186500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5086 1 episodes - episode_reward: -326.816 [-326.816, -326.816] - loss: 19.498 - mae: 56.524 - mean_q: 69.037 Interval 2375 (1187000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0947 Interval 2376 (1187500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.2152 Interval 2377 (1188000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1511 Interval 2378 (1188500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1415 Interval 2379 (1189000 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.2301 Interval 2380 (1189500 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.1262 Interval 2381 (1190000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.7796 2 episodes - episode_reward: -409.531 [-638.209, -180.853] - loss: 20.961 - mae: 58.767 - mean_q: 70.942 Interval 2382 (1190500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5458 2 episodes - episode_reward: -381.833 [-578.655, -185.011] - loss: 25.749 - mae: 58.934 - mean_q: 70.811 Interval 2383 (1191000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0141 2 episodes - episode_reward: -214.979 [-312.607, -117.351] - loss: 25.177 - mae: 58.383 - mean_q: 70.334 Interval 2384 (1191500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3436 1 episodes - episode_reward: -269.619 [-269.619, -269.619] - loss: 22.166 - mae: 58.200 - mean_q: 70.147 Interval 2385 (1192000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1930 1 episodes - episode_reward: -147.935 [-147.935, -147.935] - loss: 46.894 - mae: 58.525 - mean_q: 69.742 Interval 2386 (1192500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4908 3 episodes - episode_reward: -240.975 [-466.845, -97.319] - loss: 24.362 - mae: 58.325 - mean_q: 69.489 Interval 2387 (1193000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1113 Interval 2388 (1193500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2789 Interval 2389 (1194000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0768 Interval 2390 (1194500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1826 Interval 2391 (1195000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1778 Interval 2392 (1195500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0571 1 episodes - episode_reward: -471.737 [-471.737, -471.737] - loss: 37.174 - mae: 57.690 - mean_q: 69.650 Interval 2393 (1196000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4288 1 episodes - episode_reward: -207.744 [-207.744, -207.744] - loss: 21.942 - mae: 57.163 - mean_q: 69.463 Interval 2394 (1196500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1158 Interval 2395 (1197000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2496 4 episodes - episode_reward: -173.771 [-240.676, -111.566] - loss: 23.208 - mae: 57.263 - mean_q: 69.775 Interval 2396 (1197500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9059 4 episodes - episode_reward: -249.376 [-485.471, -137.356] - loss: 33.962 - mae: 57.522 - mean_q: 69.055 Interval 2397 (1198000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4179 3 episodes - episode_reward: -209.640 [-263.861, -165.771] - loss: 23.078 - mae: 57.380 - mean_q: 69.279 Interval 2398 (1198500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3619 3 episodes - episode_reward: -221.071 [-297.603, -161.920] - loss: 25.148 - mae: 58.882 - mean_q: 71.705 Interval 2399 (1199000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5300 1 episodes - episode_reward: -195.551 [-195.551, -195.551] - loss: 24.822 - mae: 57.940 - mean_q: 69.754 Interval 2400 (1199500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5926 1 episodes - episode_reward: -354.674 [-354.674, -354.674] - loss: 25.867 - mae: 58.461 - mean_q: 70.786 Interval 2401 (1200000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2417 2 episodes - episode_reward: -246.207 [-316.567, -175.847] - loss: 20.491 - mae: 58.586 - mean_q: 70.605 Interval 2402 (1200500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7151 2 episodes - episode_reward: -271.346 [-449.178, -93.514] - loss: 36.234 - mae: 59.298 - mean_q: 71.653 Interval 2403 (1201000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1457 Interval 2404 (1201500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.7875 1 episodes - episode_reward: -460.750 [-460.750, -460.750] - loss: 32.243 - mae: 60.567 - mean_q: 72.211 Interval 2405 (1202000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0192 1 episodes - episode_reward: -55.137 [-55.137, -55.137] - loss: 37.635 - mae: 60.916 - mean_q: 72.566 Interval 2406 (1202500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0831 1 episodes - episode_reward: -536.641 [-536.641, -536.641] - loss: 26.463 - mae: 61.770 - mean_q: 73.694 Interval 2407 (1203000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3770 1 episodes - episode_reward: -180.679 [-180.679, -180.679] - loss: 32.075 - mae: 62.351 - mean_q: 75.385 Interval 2408 (1203500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9330 2 episodes - episode_reward: -227.191 [-268.200, -186.182] - loss: 31.404 - mae: 63.553 - mean_q: 77.183 Interval 2409 (1204000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2159 Interval 2410 (1204500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6042 2 episodes - episode_reward: -144.959 [-155.903, -134.015] - loss: 42.476 - mae: 65.670 - mean_q: 79.343 Interval 2411 (1205000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5520 2 episodes - episode_reward: -405.335 [-524.506, -286.164] - loss: 59.301 - mae: 66.901 - mean_q: 79.783 Interval 2412 (1205500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8628 3 episodes - episode_reward: -170.453 [-215.794, -123.651] - loss: 49.381 - mae: 69.920 - mean_q: 84.673 Interval 2413 (1206000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4892 2 episodes - episode_reward: -175.627 [-204.543, -146.711] - loss: 48.431 - mae: 73.127 - mean_q: 89.170 Interval 2414 (1206500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1071 Interval 2415 (1207000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7920 4 episodes - episode_reward: -89.132 [-274.338, 165.449] - loss: 59.676 - mae: 77.227 - mean_q: 93.652 Interval 2416 (1207500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4706 3 episodes - episode_reward: -94.601 [-119.947, -51.116] - loss: 50.683 - mae: 80.213 - mean_q: 99.074 Interval 2417 (1208000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3026 4 episodes - episode_reward: -126.232 [-245.014, -24.367] - loss: 53.158 - mae: 84.683 - mean_q: 105.940 Interval 2418 (1208500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0900 5 episodes - episode_reward: -131.782 [-193.007, -24.022] - loss: 56.142 - mae: 88.727 - mean_q: 111.095 Interval 2419 (1209000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4722 5 episodes - episode_reward: -130.095 [-241.847, -35.950] - loss: 68.737 - mae: 93.715 - mean_q: 118.165 Interval 2420 (1209500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9511 3 episodes - episode_reward: -166.288 [-239.134, -80.235] - loss: 69.419 - mae: 98.283 - mean_q: 123.820 Interval 2421 (1210000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9241 5 episodes - episode_reward: -95.705 [-235.005, -31.769] - loss: 63.496 - mae: 101.323 - mean_q: 129.001 Interval 2422 (1210500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0622 3 episodes - episode_reward: -161.834 [-263.693, -98.423] - loss: 75.978 - mae: 105.213 - mean_q: 133.659 Interval 2423 (1211000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3606 2 episodes - episode_reward: -147.473 [-148.312, -146.633] - loss: 82.908 - mae: 107.140 - mean_q: 135.104 Interval 2424 (1211500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5539 3 episodes - episode_reward: -59.405 [-331.091, 165.747] - loss: 68.666 - mae: 110.069 - mean_q: 139.689 Interval 2425 (1212000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9185 4 episodes - episode_reward: -121.076 [-210.192, -43.701] - loss: 63.731 - mae: 112.477 - mean_q: 142.718 Interval 2426 (1212500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2743 3 episodes - episode_reward: -185.986 [-228.578, -127.549] - loss: 79.881 - mae: 112.846 - mean_q: 143.220 Interval 2427 (1213000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0255 5 episodes - episode_reward: -204.609 [-318.447, -98.688] - loss: 73.787 - mae: 113.443 - mean_q: 142.841 Interval 2428 (1213500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9020 3 episodes - episode_reward: -293.874 [-428.549, -182.207] - loss: 65.646 - mae: 113.792 - mean_q: 142.764 Interval 2429 (1214000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.5796 2 episodes - episode_reward: -623.731 [-872.255, -375.208] - loss: 57.347 - mae: 112.403 - mean_q: 139.949 Interval 2430 (1214500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7767 2 episodes - episode_reward: -278.068 [-373.971, -182.165] - loss: 67.065 - mae: 111.060 - mean_q: 137.269 Interval 2431 (1215000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3866 1 episodes - episode_reward: -128.343 [-128.343, -128.343] - loss: 74.082 - mae: 111.640 - mean_q: 137.352 Interval 2432 (1215500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.8305 6 episodes - episode_reward: -333.201 [-1075.056, -109.404] - loss: 102.488 - mae: 110.041 - mean_q: 135.542 Interval 2433 (1216000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2083 2 episodes - episode_reward: -300.539 [-439.078, -161.999] - loss: 122.739 - mae: 109.167 - mean_q: 134.385 Interval 2434 (1216500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1517 Interval 2435 (1217000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1659 Interval 2436 (1217500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1248 Interval 2437 (1218000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1790 Interval 2438 (1218500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4546 2 episodes - episode_reward: -498.805 [-967.974, -29.636] - loss: 74.236 - mae: 116.852 - mean_q: 141.475 Interval 2439 (1219000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4625 2 episodes - episode_reward: -140.459 [-144.442, -136.475] - loss: 69.293 - mae: 117.873 - mean_q: 144.836 Interval 2440 (1219500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2668 1 episodes - episode_reward: -91.358 [-91.358, -91.358] - loss: 81.975 - mae: 120.295 - mean_q: 147.805 Interval 2441 (1220000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4936 3 episodes - episode_reward: -166.650 [-223.087, -115.609] - loss: 83.677 - mae: 123.711 - mean_q: 153.994 Interval 2442 (1220500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.0460 3 episodes - episode_reward: -437.943 [-613.857, -284.285] - loss: 95.710 - mae: 122.420 - mean_q: 151.023 Interval 2443 (1221000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5787 2 episodes - episode_reward: -160.951 [-187.805, -134.097] - loss: 118.850 - mae: 121.964 - mean_q: 151.695 Interval 2444 (1221500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2671 1 episodes - episode_reward: -543.402 [-543.402, -543.402] - loss: 101.096 - mae: 122.471 - mean_q: 152.497 Interval 2445 (1222000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7791 4 episodes - episode_reward: -131.621 [-187.089, -72.945] - loss: 95.186 - mae: 122.213 - mean_q: 152.356 Interval 2446 (1222500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5748 2 episodes - episode_reward: -1073.846 [-1994.560, -153.133] - loss: 164.718 - mae: 124.617 - mean_q: 155.474 Interval 2447 (1223000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2763 3 episodes - episode_reward: -397.198 [-820.593, -73.695] - loss: 123.162 - mae: 124.359 - mean_q: 156.015 Interval 2448 (1223500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4451 Interval 2449 (1224000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4163 5 episodes - episode_reward: -253.498 [-718.827, -88.107] - loss: 99.838 - mae: 120.974 - mean_q: 151.308 Interval 2450 (1224500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6025 4 episodes - episode_reward: -342.129 [-536.003, -174.804] - loss: 99.679 - mae: 118.978 - mean_q: 147.259 Interval 2451 (1225000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7689 2 episodes - episode_reward: -420.228 [-526.426, -314.029] - loss: 75.670 - mae: 118.319 - mean_q: 148.544 Interval 2452 (1225500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7509 3 episodes - episode_reward: -174.615 [-289.684, -106.901] - loss: 91.525 - mae: 117.784 - mean_q: 146.808 Interval 2453 (1226000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6921 1 episodes - episode_reward: -319.865 [-319.865, -319.865] - loss: 75.811 - mae: 116.647 - mean_q: 144.367 Interval 2454 (1226500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0786 2 episodes - episode_reward: -244.263 [-356.696, -131.831] - loss: 91.311 - mae: 115.632 - mean_q: 143.935 Interval 2455 (1227000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3964 2 episodes - episode_reward: -130.322 [-160.645, -100.000] - loss: 81.801 - mae: 115.711 - mean_q: 143.639 Interval 2456 (1227500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5718 2 episodes - episode_reward: -162.873 [-525.932, 200.187] - loss: 87.091 - mae: 115.452 - mean_q: 144.515 Interval 2457 (1228000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9877 1 episodes - episode_reward: -499.742 [-499.742, -499.742] - loss: 83.796 - mae: 114.492 - mean_q: 144.209 Interval 2458 (1228500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9299 3 episodes - episode_reward: -145.274 [-192.227, -70.108] - loss: 122.372 - mae: 114.744 - mean_q: 143.991 Interval 2459 (1229000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2628 2 episodes - episode_reward: -315.269 [-527.126, -103.412] - loss: 100.601 - mae: 113.309 - mean_q: 141.680 Interval 2460 (1229500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1477 4 episodes - episode_reward: -136.996 [-200.813, -60.266] - loss: 77.700 - mae: 113.086 - mean_q: 142.206 Interval 2461 (1230000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2386 2 episodes - episode_reward: -63.484 [-80.288, -46.680] - loss: 161.823 - mae: 110.843 - mean_q: 139.665 Interval 2462 (1230500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4209 2 episodes - episode_reward: -148.399 [-189.975, -106.823] - loss: 108.904 - mae: 109.153 - mean_q: 137.945 Interval 2463 (1231000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9797 2 episodes - episode_reward: -220.902 [-320.652, -121.151] - loss: 112.350 - mae: 107.515 - mean_q: 136.733 Interval 2464 (1231500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1979 2 episodes - episode_reward: 53.712 [-80.116, 187.539] - loss: 154.332 - mae: 108.216 - mean_q: 137.081 Interval 2465 (1232000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0750 2 episodes - episode_reward: -254.238 [-322.840, -185.635] - loss: 118.791 - mae: 104.312 - mean_q: 129.813 Interval 2466 (1232500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6330 1 episodes - episode_reward: -286.456 [-286.456, -286.456] - loss: 98.272 - mae: 105.206 - mean_q: 130.640 Interval 2467 (1233000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7531 2 episodes - episode_reward: -208.047 [-283.389, -132.705] - loss: 107.579 - mae: 106.268 - mean_q: 131.829 Interval 2468 (1233500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1989 1 episodes - episode_reward: -503.965 [-503.965, -503.965] - loss: 78.965 - mae: 103.855 - mean_q: 127.670 Interval 2469 (1234000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6541 5 episodes - episode_reward: -184.124 [-237.940, -134.040] - loss: 101.180 - mae: 104.407 - mean_q: 128.879 Interval 2470 (1234500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1163 3 episodes - episode_reward: -128.587 [-167.369, -103.558] - loss: 92.986 - mae: 104.692 - mean_q: 129.331 Interval 2471 (1235000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4830 5 episodes - episode_reward: -191.119 [-333.459, -81.913] - loss: 97.714 - mae: 103.943 - mean_q: 127.165 Interval 2472 (1235500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0811 Interval 2473 (1236000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0351 Interval 2474 (1236500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2037 1 episodes - episode_reward: 166.559 [166.559, 166.559] - loss: 68.209 - mae: 99.861 - mean_q: 121.310 Interval 2475 (1237000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0547 1 episodes - episode_reward: -100.000 [-100.000, -100.000] - loss: 77.874 - mae: 99.304 - mean_q: 119.120 Interval 2476 (1237500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0762 Interval 2477 (1238000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0263 Interval 2478 (1238500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5751 3 episodes - episode_reward: -107.023 [-306.621, 121.366] - loss: 75.516 - mae: 96.061 - mean_q: 116.398 Interval 2479 (1239000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1686 Interval 2480 (1239500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6571 1 episodes - episode_reward: -409.644 [-409.644, -409.644] - loss: 80.039 - mae: 91.693 - mean_q: 111.628 Interval 2481 (1240000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2532 1 episodes - episode_reward: -385.593 [-385.593, -385.593] - loss: 58.271 - mae: 90.610 - mean_q: 109.638 Interval 2482 (1240500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1008 2 episodes - episode_reward: -336.045 [-385.152, -286.937] - loss: 63.886 - mae: 89.443 - mean_q: 108.598 Interval 2483 (1241000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0263 4 episodes - episode_reward: -257.394 [-524.772, -152.050] - loss: 68.149 - mae: 89.411 - mean_q: 109.773 Interval 2484 (1241500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9949 2 episodes - episode_reward: -415.390 [-420.283, -410.498] - loss: 67.811 - mae: 89.312 - mean_q: 109.623 Interval 2485 (1242000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5151 5 episodes - episode_reward: -295.750 [-421.090, -127.941] - loss: 63.318 - mae: 88.399 - mean_q: 108.325 Interval 2486 (1242500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0618 4 episodes - episode_reward: -265.429 [-434.782, -100.000] - loss: 62.040 - mae: 87.902 - mean_q: 107.739 Interval 2487 (1243000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0908 3 episodes - episode_reward: -161.087 [-240.263, -79.718] - loss: 104.741 - mae: 88.230 - mean_q: 107.218 Interval 2488 (1243500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6544 2 episodes - episode_reward: -279.177 [-316.064, -242.291] - loss: 65.148 - mae: 87.654 - mean_q: 106.871 Interval 2489 (1244000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8742 5 episodes - episode_reward: -259.513 [-550.582, -117.357] - loss: 85.330 - mae: 88.475 - mean_q: 106.488 Interval 2490 (1244500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4047 Interval 2491 (1245000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9201 1 episodes - episode_reward: -485.060 [-485.060, -485.060] - loss: 83.380 - mae: 88.836 - mean_q: 107.383 Interval 2492 (1245500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3858 4 episodes - episode_reward: -209.922 [-453.324, -51.189] - loss: 59.876 - mae: 88.124 - mean_q: 108.037 Interval 2493 (1246000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1759 2 episodes - episode_reward: -285.222 [-288.490, -281.955] - loss: 74.553 - mae: 87.356 - mean_q: 106.895 Interval 2494 (1246500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7246 1 episodes - episode_reward: -301.644 [-301.644, -301.644] - loss: 86.029 - mae: 85.214 - mean_q: 104.429 Interval 2495 (1247000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3384 3 episodes - episode_reward: -350.238 [-655.647, -118.321] - loss: 87.693 - mae: 84.267 - mean_q: 102.805 Interval 2496 (1247500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5106 1 episodes - episode_reward: -396.739 [-396.739, -396.739] - loss: 59.808 - mae: 84.100 - mean_q: 102.831 Interval 2497 (1248000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7611 1 episodes - episode_reward: -402.598 [-402.598, -402.598] - loss: 85.012 - mae: 83.768 - mean_q: 102.520 Interval 2498 (1248500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8300 2 episodes - episode_reward: -122.667 [-145.334, -100.000] - loss: 79.569 - mae: 82.067 - mean_q: 99.832 Interval 2499 (1249000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7896 1 episodes - episode_reward: -311.160 [-311.160, -311.160] - loss: 77.195 - mae: 81.690 - mean_q: 98.520 Interval 2500 (1249500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6079 4 episodes - episode_reward: -240.857 [-373.800, -120.445] - loss: 60.790 - mae: 79.940 - mean_q: 94.476 Interval 2501 (1250000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6779 3 episodes - episode_reward: -298.711 [-531.979, -100.000] - loss: 82.982 - mae: 79.243 - mean_q: 92.073 Interval 2502 (1250500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9319 3 episodes - episode_reward: -112.462 [-119.962, -100.801] - loss: 62.732 - mae: 79.668 - mean_q: 93.101 Interval 2503 (1251000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5973 3 episodes - episode_reward: -657.054 [-1323.718, -311.118] - loss: 94.377 - mae: 77.050 - mean_q: 89.586 Interval 2504 (1251500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0242 2 episodes - episode_reward: -225.364 [-353.699, -97.028] - loss: 57.603 - mae: 75.489 - mean_q: 86.810 Interval 2505 (1252000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4761 Interval 2506 (1252500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4350 Interval 2507 (1253000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2617 3 episodes - episode_reward: -713.487 [-1158.757, -320.168] - loss: 57.884 - mae: 71.696 - mean_q: 79.449 Interval 2508 (1253500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9714 1 episodes - episode_reward: -250.225 [-250.225, -250.225] - loss: 61.073 - mae: 71.506 - mean_q: 76.858 Interval 2509 (1254000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2409 Interval 2510 (1254500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1774 1 episodes - episode_reward: -572.130 [-572.130, -572.130] - loss: 56.642 - mae: 70.435 - mean_q: 74.855 Interval 2511 (1255000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3219 3 episodes - episode_reward: -37.057 [-247.683, 220.745] - loss: 64.628 - mae: 67.482 - mean_q: 70.556 Interval 2512 (1255500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7044 1 episodes - episode_reward: -580.353 [-580.353, -580.353] - loss: 46.926 - mae: 67.921 - mean_q: 71.020 Interval 2513 (1256000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2523 3 episodes - episode_reward: -160.366 [-312.845, -13.784] - loss: 61.527 - mae: 67.514 - mean_q: 69.204 Interval 2514 (1256500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6345 5 episodes - episode_reward: -328.127 [-829.195, -100.000] - loss: 64.597 - mae: 67.061 - mean_q: 68.112 Interval 2515 (1257000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3079 3 episodes - episode_reward: -223.996 [-290.212, -115.624] - loss: 57.773 - mae: 66.168 - mean_q: 67.454 Interval 2516 (1257500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9856 1 episodes - episode_reward: -497.881 [-497.881, -497.881] - loss: 41.341 - mae: 64.747 - mean_q: 65.025 Interval 2517 (1258000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8471 4 episodes - episode_reward: -230.210 [-372.740, -146.817] - loss: 51.000 - mae: 67.205 - mean_q: 68.711 Interval 2518 (1258500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0686 1 episodes - episode_reward: -527.847 [-527.847, -527.847] - loss: 76.712 - mae: 65.868 - mean_q: 66.908 Interval 2519 (1259000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3970 3 episodes - episode_reward: -388.370 [-558.939, -159.016] - loss: 68.725 - mae: 65.310 - mean_q: 66.644 Interval 2520 (1259500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0779 3 episodes - episode_reward: -86.885 [-108.666, -51.988] - loss: 66.456 - mae: 65.262 - mean_q: 66.753 Interval 2521 (1260000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1262 4 episodes - episode_reward: -341.291 [-517.028, -149.067] - loss: 54.587 - mae: 65.054 - mean_q: 67.454 Interval 2522 (1260500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8383 2 episodes - episode_reward: -157.773 [-173.212, -142.334] - loss: 59.022 - mae: 64.204 - mean_q: 63.930 Interval 2523 (1261000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5989 2 episodes - episode_reward: -207.638 [-275.195, -140.081] - loss: 76.481 - mae: 63.380 - mean_q: 63.048 Interval 2524 (1261500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3196 3 episodes - episode_reward: -353.588 [-448.238, -253.579] - loss: 52.528 - mae: 61.646 - mean_q: 59.492 Interval 2525 (1262000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7352 2 episodes - episode_reward: -213.102 [-268.123, -158.082] - loss: 54.986 - mae: 62.837 - mean_q: 60.890 Interval 2526 (1262500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4412 2 episodes - episode_reward: -124.055 [-158.421, -89.689] - loss: 52.669 - mae: 63.058 - mean_q: 61.142 Interval 2527 (1263000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4891 3 episodes - episode_reward: -207.312 [-392.241, -46.265] - loss: 76.561 - mae: 62.814 - mean_q: 61.840 Interval 2528 (1263500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6230 3 episodes - episode_reward: -310.671 [-402.295, -196.291] - loss: 80.673 - mae: 63.540 - mean_q: 63.366 Interval 2529 (1264000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8609 4 episodes - episode_reward: -321.063 [-750.868, -89.531] - loss: 74.670 - mae: 64.277 - mean_q: 66.107 Interval 2530 (1264500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2775 4 episodes - episode_reward: -582.466 [-811.372, -357.375] - loss: 78.425 - mae: 65.755 - mean_q: 68.838 Interval 2531 (1265000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2009 1 episodes - episode_reward: -438.343 [-438.343, -438.343] - loss: 84.942 - mae: 67.089 - mean_q: 71.333 Interval 2532 (1265500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9548 1 episodes - episode_reward: -587.795 [-587.795, -587.795] - loss: 64.777 - mae: 67.755 - mean_q: 73.584 Interval 2533 (1266000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1496 1 episodes - episode_reward: -517.360 [-517.360, -517.360] - loss: 79.223 - mae: 70.824 - mean_q: 77.748 Interval 2534 (1266500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8729 1 episodes - episode_reward: -335.626 [-335.626, -335.626] - loss: 68.996 - mae: 74.114 - mean_q: 81.397 Interval 2535 (1267000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8996 1 episodes - episode_reward: -399.878 [-399.878, -399.878] - loss: 122.437 - mae: 76.639 - mean_q: 85.053 Interval 2536 (1267500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5124 1 episodes - episode_reward: -393.457 [-393.457, -393.457] - loss: 95.489 - mae: 78.633 - mean_q: 88.294 Interval 2537 (1268000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5400 2 episodes - episode_reward: -700.477 [-914.495, -486.460] - loss: 108.007 - mae: 79.481 - mean_q: 89.313 Interval 2538 (1268500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0551 4 episodes - episode_reward: -226.480 [-341.531, -42.013] - loss: 123.701 - mae: 81.949 - mean_q: 91.508 Interval 2539 (1269000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2823 1 episodes - episode_reward: -617.953 [-617.953, -617.953] - loss: 82.224 - mae: 81.654 - mean_q: 91.120 Interval 2540 (1269500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7791 3 episodes - episode_reward: -660.124 [-1529.271, -133.248] - loss: 114.762 - mae: 83.386 - mean_q: 92.943 Interval 2541 (1270000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5363 4 episodes - episode_reward: -306.804 [-455.970, -186.409] - loss: 101.954 - mae: 83.676 - mean_q: 92.606 Interval 2542 (1270500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7250 4 episodes - episode_reward: -209.570 [-334.024, -129.439] - loss: 88.831 - mae: 84.984 - mean_q: 91.733 Interval 2543 (1271000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9011 2 episodes - episode_reward: -493.686 [-831.659, -155.713] - loss: 77.403 - mae: 85.173 - mean_q: 90.206 Interval 2544 (1271500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1311 2 episodes - episode_reward: -236.641 [-308.104, -165.178] - loss: 92.973 - mae: 84.079 - mean_q: 89.759 Interval 2545 (1272000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8317 2 episodes - episode_reward: -249.995 [-287.304, -212.686] - loss: 77.611 - mae: 83.837 - mean_q: 89.259 Interval 2546 (1272500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6277 2 episodes - episode_reward: -369.597 [-375.711, -363.484] - loss: 78.527 - mae: 84.059 - mean_q: 88.755 Interval 2547 (1273000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5940 2 episodes - episode_reward: -449.055 [-577.482, -320.628] - loss: 79.762 - mae: 84.241 - mean_q: 87.848 Interval 2548 (1273500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3355 3 episodes - episode_reward: -404.392 [-509.814, -307.614] - loss: 83.883 - mae: 84.265 - mean_q: 88.551 Interval 2549 (1274000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9372 1 episodes - episode_reward: -311.083 [-311.083, -311.083] - loss: 74.940 - mae: 84.707 - mean_q: 87.461 Interval 2550 (1274500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5268 2 episodes - episode_reward: -408.009 [-536.315, -279.703] - loss: 88.360 - mae: 86.718 - mean_q: 89.231 Interval 2551 (1275000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1031 1 episodes - episode_reward: -351.838 [-351.838, -351.838] - loss: 85.536 - mae: 86.279 - mean_q: 89.357 Interval 2552 (1275500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9815 1 episodes - episode_reward: -573.275 [-573.275, -573.275] - loss: 74.763 - mae: 85.940 - mean_q: 89.873 Interval 2553 (1276000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1843 1 episodes - episode_reward: -1218.660 [-1218.660, -1218.660] - loss: 85.942 - mae: 88.067 - mean_q: 90.912 Interval 2554 (1276500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9856 1 episodes - episode_reward: -477.125 [-477.125, -477.125] - loss: 74.688 - mae: 87.680 - mean_q: 90.225 Interval 2555 (1277000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0647 2 episodes - episode_reward: -191.462 [-222.265, -160.660] - loss: 89.907 - mae: 88.237 - mean_q: 89.286 Interval 2556 (1277500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6410 2 episodes - episode_reward: -358.353 [-413.106, -303.601] - loss: 84.247 - mae: 86.114 - mean_q: 88.005 Interval 2557 (1278000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5155 2 episodes - episode_reward: -432.014 [-601.891, -262.137] - loss: 75.314 - mae: 85.304 - mean_q: 87.918 Interval 2558 (1278500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3603 2 episodes - episode_reward: -858.231 [-1605.451, -111.011] - loss: 85.433 - mae: 85.979 - mean_q: 86.022 Interval 2559 (1279000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1862 1 episodes - episode_reward: -623.370 [-623.370, -623.370] - loss: 81.412 - mae: 86.212 - mean_q: 86.762 Interval 2560 (1279500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7350 1 episodes - episode_reward: -406.798 [-406.798, -406.798] - loss: 67.677 - mae: 86.377 - mean_q: 88.764 Interval 2561 (1280000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0625 3 episodes - episode_reward: -341.427 [-398.845, -254.899] - loss: 69.495 - mae: 85.534 - mean_q: 84.584 Interval 2562 (1280500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6930 2 episodes - episode_reward: -497.183 [-558.680, -435.685] - loss: 71.807 - mae: 84.702 - mean_q: 84.058 Interval 2563 (1281000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6753 3 episodes - episode_reward: -80.806 [-147.001, -36.546] - loss: 74.469 - mae: 84.046 - mean_q: 81.845 Interval 2564 (1281500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0941 3 episodes - episode_reward: -342.950 [-503.182, -175.104] - loss: 72.632 - mae: 83.222 - mean_q: 81.979 Interval 2565 (1282000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3127 5 episodes - episode_reward: -249.736 [-354.823, -174.042] - loss: 73.040 - mae: 82.605 - mean_q: 78.917 Interval 2566 (1282500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9373 3 episodes - episode_reward: -254.320 [-427.714, -121.602] - loss: 69.746 - mae: 83.426 - mean_q: 76.830 Interval 2567 (1283000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4671 2 episodes - episode_reward: -472.092 [-548.609, -395.575] - loss: 67.036 - mae: 83.260 - mean_q: 77.519 Interval 2568 (1283500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3341 4 episodes - episode_reward: -272.496 [-460.606, -97.252] - loss: 83.344 - mae: 82.900 - mean_q: 76.735 Interval 2569 (1284000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2948 3 episodes - episode_reward: -224.030 [-270.685, -146.143] - loss: 64.395 - mae: 82.447 - mean_q: 76.112 Interval 2570 (1284500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0765 5 episodes - episode_reward: -218.610 [-349.331, -100.000] - loss: 62.140 - mae: 81.105 - mean_q: 74.221 Interval 2571 (1285000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6822 3 episodes - episode_reward: -111.390 [-176.986, -51.390] - loss: 61.757 - mae: 82.110 - mean_q: 72.886 Interval 2572 (1285500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7430 3 episodes - episode_reward: -260.463 [-274.200, -249.967] - loss: 76.187 - mae: 81.375 - mean_q: 71.536 Interval 2573 (1286000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0308 3 episodes - episode_reward: -174.555 [-267.118, -43.524] - loss: 73.589 - mae: 79.212 - mean_q: 68.886 Interval 2574 (1286500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1651 5 episodes - episode_reward: -325.582 [-805.282, -61.187] - loss: 71.967 - mae: 77.061 - mean_q: 66.545 Interval 2575 (1287000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8610 3 episodes - episode_reward: -454.257 [-903.071, -178.678] - loss: 61.388 - mae: 77.673 - mean_q: 64.846 Interval 2576 (1287500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5657 3 episodes - episode_reward: -307.348 [-371.242, -267.800] - loss: 70.636 - mae: 76.056 - mean_q: 61.957 Interval 2577 (1288000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8464 1 episodes - episode_reward: -440.183 [-440.183, -440.183] - loss: 64.482 - mae: 75.168 - mean_q: 60.932 Interval 2578 (1288500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0867 Interval 2579 (1289000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0534 Interval 2580 (1289500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5477 3 episodes - episode_reward: -261.805 [-485.393, 76.250] - loss: 93.260 - mae: 76.435 - mean_q: 61.597 Interval 2581 (1290000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1925 2 episodes - episode_reward: -310.268 [-410.452, -210.084] - loss: 76.410 - mae: 76.001 - mean_q: 60.638 Interval 2582 (1290500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2270 2 episodes - episode_reward: -284.953 [-337.820, -232.086] - loss: 88.110 - mae: 75.538 - mean_q: 59.073 Interval 2583 (1291000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4052 3 episodes - episode_reward: -408.249 [-538.873, -232.989] - loss: 76.323 - mae: 74.984 - mean_q: 60.190 Interval 2584 (1291500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5803 1 episodes - episode_reward: -228.751 [-228.751, -228.751] - loss: 79.442 - mae: 75.061 - mean_q: 59.404 Interval 2585 (1292000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0863 1 episodes - episode_reward: -470.960 [-470.960, -470.960] - loss: 78.487 - mae: 74.119 - mean_q: 58.449 Interval 2586 (1292500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4817 2 episodes - episode_reward: -421.836 [-490.112, -353.560] - loss: 67.001 - mae: 72.683 - mean_q: 60.182 Interval 2587 (1293000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3033 1 episodes - episode_reward: -560.315 [-560.315, -560.315] - loss: 63.674 - mae: 72.216 - mean_q: 59.574 Interval 2588 (1293500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5596 2 episodes - episode_reward: -168.029 [-216.979, -119.080] - loss: 57.784 - mae: 71.645 - mean_q: 58.951 Interval 2589 (1294000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6515 1 episodes - episode_reward: -258.263 [-258.263, -258.263] - loss: 55.340 - mae: 72.047 - mean_q: 60.804 Interval 2590 (1294500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2920 Interval 2591 (1295000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2409 2 episodes - episode_reward: -405.694 [-650.284, -161.105] - loss: 54.664 - mae: 72.407 - mean_q: 60.687 Interval 2592 (1295500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3129 1 episodes - episode_reward: -594.770 [-594.770, -594.770] - loss: 62.655 - mae: 72.432 - mean_q: 60.830 Interval 2593 (1296000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0342 1 episodes - episode_reward: -562.745 [-562.745, -562.745] - loss: 59.939 - mae: 72.285 - mean_q: 60.648 Interval 2594 (1296500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7892 2 episodes - episode_reward: -439.034 [-449.805, -428.263] - loss: 50.976 - mae: 72.651 - mean_q: 60.941 Interval 2595 (1297000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0950 3 episodes - episode_reward: -284.848 [-477.761, -158.560] - loss: 53.445 - mae: 73.288 - mean_q: 60.641 Interval 2596 (1297500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3874 3 episodes - episode_reward: -342.223 [-467.472, -236.749] - loss: 55.040 - mae: 73.602 - mean_q: 60.297 Interval 2597 (1298000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7965 2 episodes - episode_reward: -160.565 [-240.951, -80.179] - loss: 55.666 - mae: 75.094 - mean_q: 63.403 Interval 2598 (1298500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3291 3 episodes - episode_reward: -186.083 [-243.002, -84.225] - loss: 56.433 - mae: 75.177 - mean_q: 61.908 Interval 2599 (1299000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6517 2 episodes - episode_reward: -436.052 [-507.923, -364.182] - loss: 55.285 - mae: 74.951 - mean_q: 62.082 Interval 2600 (1299500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8674 3 episodes - episode_reward: -355.985 [-483.426, -271.304] - loss: 64.527 - mae: 74.774 - mean_q: 58.881 Interval 2601 (1300000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3955 4 episodes - episode_reward: -167.151 [-209.321, -122.914] - loss: 69.086 - mae: 74.540 - mean_q: 58.656 Interval 2602 (1300500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6083 3 episodes - episode_reward: -273.314 [-490.534, -150.562] - loss: 60.647 - mae: 75.508 - mean_q: 56.526 Interval 2603 (1301000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2219 1 episodes - episode_reward: -574.842 [-574.842, -574.842] - loss: 66.925 - mae: 74.657 - mean_q: 57.886 Interval 2604 (1301500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9140 4 episodes - episode_reward: -159.388 [-247.081, -97.549] - loss: 53.551 - mae: 73.914 - mean_q: 55.971 Interval 2605 (1302000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2463 2 episodes - episode_reward: -518.382 [-958.397, -78.366] - loss: 79.113 - mae: 74.703 - mean_q: 55.377 Interval 2606 (1302500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6489 5 episodes - episode_reward: -256.110 [-461.675, -91.043] - loss: 60.593 - mae: 74.718 - mean_q: 55.151 Interval 2607 (1303000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7525 1 episodes - episode_reward: -366.942 [-366.942, -366.942] - loss: 64.233 - mae: 75.111 - mean_q: 56.373 Interval 2608 (1303500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1747 2 episodes - episode_reward: -546.315 [-947.707, -144.922] - loss: 72.842 - mae: 74.527 - mean_q: 56.413 Interval 2609 (1304000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8841 2 episodes - episode_reward: -224.538 [-262.852, -186.224] - loss: 65.606 - mae: 74.510 - mean_q: 55.696 Interval 2610 (1304500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3404 5 episodes - episode_reward: -233.325 [-343.724, -103.586] - loss: 70.984 - mae: 73.823 - mean_q: 54.906 Interval 2611 (1305000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3763 4 episodes - episode_reward: -300.547 [-419.115, -142.180] - loss: 66.778 - mae: 74.454 - mean_q: 56.619 Interval 2612 (1305500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4649 2 episodes - episode_reward: -240.198 [-248.641, -231.754] - loss: 61.729 - mae: 74.709 - mean_q: 54.624 Interval 2613 (1306000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1262 3 episodes - episode_reward: -254.097 [-419.777, -124.046] - loss: 71.318 - mae: 75.520 - mean_q: 57.559 Interval 2614 (1306500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0047 2 episodes - episode_reward: -193.237 [-300.591, -85.883] - loss: 66.370 - mae: 76.545 - mean_q: 58.853 Interval 2615 (1307000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3230 3 episodes - episode_reward: -272.417 [-392.375, -94.527] - loss: 75.884 - mae: 77.585 - mean_q: 59.647 Interval 2616 (1307500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1859 Interval 2617 (1308000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7340 2 episodes - episode_reward: -202.810 [-284.969, -120.650] - loss: 66.424 - mae: 81.205 - mean_q: 62.554 Interval 2618 (1308500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4836 2 episodes - episode_reward: -364.837 [-549.881, -179.793] - loss: 71.449 - mae: 81.626 - mean_q: 65.693 Interval 2619 (1309000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1754 3 episodes - episode_reward: -390.138 [-674.918, -146.151] - loss: 69.112 - mae: 82.975 - mean_q: 68.130 Interval 2620 (1309500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3516 1 episodes - episode_reward: -147.777 [-147.777, -147.777] - loss: 83.633 - mae: 83.498 - mean_q: 67.236 Interval 2621 (1310000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9896 7 episodes - episode_reward: -142.091 [-222.051, -79.650] - loss: 58.994 - mae: 81.329 - mean_q: 66.310 Interval 2622 (1310500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4451 2 episodes - episode_reward: -121.508 [-166.979, -76.037] - loss: 77.308 - mae: 82.027 - mean_q: 64.819 Interval 2623 (1311000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9808 4 episodes - episode_reward: -119.240 [-203.663, -85.262] - loss: 74.742 - mae: 81.792 - mean_q: 63.979 Interval 2624 (1311500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3946 1 episodes - episode_reward: -160.387 [-160.387, -160.387] - loss: 85.572 - mae: 81.137 - mean_q: 65.044 Interval 2625 (1312000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2313 Interval 2626 (1312500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4143 1 episodes - episode_reward: -789.116 [-789.116, -789.116] - loss: 66.340 - mae: 78.418 - mean_q: 61.722 Interval 2627 (1313000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1127 Interval 2628 (1313500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3923 1 episodes - episode_reward: -743.074 [-743.074, -743.074] - loss: 66.526 - mae: 77.195 - mean_q: 60.450 Interval 2629 (1314000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2228 Interval 2630 (1314500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0823 2 episodes - episode_reward: -375.772 [-410.810, -340.735] - loss: 57.568 - mae: 73.727 - mean_q: 58.295 Interval 2631 (1315000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5659 1 episodes - episode_reward: -113.294 [-113.294, -113.294] - loss: 56.530 - mae: 73.357 - mean_q: 57.457 Interval 2632 (1315500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6675 1 episodes - episode_reward: -337.861 [-337.861, -337.861] - loss: 60.540 - mae: 74.057 - mean_q: 55.692 Interval 2633 (1316000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6452 1 episodes - episode_reward: -456.776 [-456.776, -456.776] - loss: 63.259 - mae: 74.273 - mean_q: 56.601 Interval 2634 (1316500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9976 3 episodes - episode_reward: -340.473 [-446.090, -264.014] - loss: 70.364 - mae: 73.681 - mean_q: 56.280 Interval 2635 (1317000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3490 2 episodes - episode_reward: -309.070 [-442.208, -175.932] - loss: 73.246 - mae: 72.762 - mean_q: 56.454 Interval 2636 (1317500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1077 1 episodes - episode_reward: -624.592 [-624.592, -624.592] - loss: 64.844 - mae: 73.395 - mean_q: 55.585 Interval 2637 (1318000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6210 2 episodes - episode_reward: -124.839 [-135.670, -114.008] - loss: 61.673 - mae: 74.412 - mean_q: 56.419 Interval 2638 (1318500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.1727 1 episodes - episode_reward: -917.507 [-917.507, -917.507] - loss: 67.304 - mae: 75.843 - mean_q: 58.851 Interval 2639 (1319000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9001 2 episodes - episode_reward: -309.672 [-519.345, -100.000] - loss: 72.403 - mae: 74.760 - mean_q: 58.446 Interval 2640 (1319500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0727 5 episodes - episode_reward: -196.706 [-325.810, -110.701] - loss: 64.680 - mae: 74.137 - mean_q: 60.625 Interval 2641 (1320000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4590 4 episodes - episode_reward: -329.469 [-449.057, -101.898] - loss: 56.535 - mae: 73.762 - mean_q: 59.345 Interval 2642 (1320500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7960 3 episodes - episode_reward: -285.801 [-442.177, -124.368] - loss: 69.829 - mae: 73.579 - mean_q: 58.164 Interval 2643 (1321000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5748 3 episodes - episode_reward: -269.066 [-460.390, -117.661] - loss: 74.054 - mae: 72.293 - mean_q: 58.428 Interval 2644 (1321500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9347 2 episodes - episode_reward: -230.693 [-316.293, -145.092] - loss: 62.516 - mae: 71.411 - mean_q: 57.144 Interval 2645 (1322000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0869 6 episodes - episode_reward: -243.118 [-558.711, -124.384] - loss: 58.859 - mae: 70.803 - mean_q: 55.373 Interval 2646 (1322500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0103 2 episodes - episode_reward: -243.250 [-248.243, -238.257] - loss: 57.987 - mae: 70.920 - mean_q: 55.756 Interval 2647 (1323000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7130 3 episodes - episode_reward: -324.160 [-586.315, -80.014] - loss: 59.751 - mae: 71.685 - mean_q: 56.577 Interval 2648 (1323500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8014 5 episodes - episode_reward: -183.190 [-278.546, -125.105] - loss: 60.331 - mae: 71.716 - mean_q: 57.019 Interval 2649 (1324000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3547 1 episodes - episode_reward: -130.213 [-130.213, -130.213] - loss: 80.678 - mae: 72.371 - mean_q: 59.361 Interval 2650 (1324500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9137 3 episodes - episode_reward: -158.964 [-290.002, -92.704] - loss: 71.974 - mae: 73.044 - mean_q: 60.404 Interval 2651 (1325000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6881 2 episodes - episode_reward: -196.496 [-225.280, -167.712] - loss: 76.014 - mae: 74.422 - mean_q: 63.600 Interval 2652 (1325500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6900 6 episodes - episode_reward: -137.292 [-211.748, -98.041] - loss: 59.172 - mae: 75.438 - mean_q: 63.627 Interval 2653 (1326000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5474 2 episodes - episode_reward: -146.392 [-193.682, -99.103] - loss: 68.998 - mae: 75.736 - mean_q: 66.186 Interval 2654 (1326500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6620 2 episodes - episode_reward: -139.202 [-171.158, -107.246] - loss: 83.389 - mae: 77.488 - mean_q: 69.498 Interval 2655 (1327000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8600 3 episodes - episode_reward: -145.641 [-203.907, -99.186] - loss: 66.712 - mae: 78.290 - mean_q: 69.838 Interval 2656 (1327500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1264 3 episodes - episode_reward: -194.603 [-256.729, -100.000] - loss: 66.749 - mae: 80.637 - mean_q: 72.823 Interval 2657 (1328000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8552 3 episodes - episode_reward: -145.729 [-166.982, -134.682] - loss: 76.967 - mae: 82.714 - mean_q: 76.289 Interval 2658 (1328500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9507 2 episodes - episode_reward: -257.738 [-308.333, -207.143] - loss: 72.451 - mae: 84.319 - mean_q: 77.734 Interval 2659 (1329000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6137 2 episodes - episode_reward: -138.956 [-155.884, -122.028] - loss: 78.297 - mae: 86.117 - mean_q: 80.342 Interval 2660 (1329500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6499 1 episodes - episode_reward: -255.960 [-255.960, -255.960] - loss: 75.198 - mae: 87.061 - mean_q: 82.298 Interval 2661 (1330000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0714 2 episodes - episode_reward: -258.424 [-299.875, -216.973] - loss: 74.348 - mae: 88.758 - mean_q: 83.114 Interval 2662 (1330500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7200 1 episodes - episode_reward: -478.776 [-478.776, -478.776] - loss: 89.506 - mae: 89.189 - mean_q: 82.673 Interval 2663 (1331000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9729 2 episodes - episode_reward: -175.237 [-255.977, -94.497] - loss: 71.217 - mae: 89.463 - mean_q: 85.671 Interval 2664 (1331500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7748 2 episodes - episode_reward: -499.066 [-546.370, -451.763] - loss: 73.088 - mae: 89.550 - mean_q: 86.276 Interval 2665 (1332000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9480 4 episodes - episode_reward: -231.106 [-392.552, -124.939] - loss: 81.458 - mae: 89.307 - mean_q: 85.530 Interval 2666 (1332500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0857 1 episodes - episode_reward: -290.555 [-290.555, -290.555] - loss: 75.783 - mae: 87.998 - mean_q: 85.628 Interval 2667 (1333000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7477 3 episodes - episode_reward: -355.506 [-473.433, -190.729] - loss: 82.295 - mae: 88.132 - mean_q: 85.046 Interval 2668 (1333500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7617 1 episodes - episode_reward: -500.554 [-500.554, -500.554] - loss: 72.606 - mae: 87.448 - mean_q: 86.058 Interval 2669 (1334000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.0592 7 episodes - episode_reward: -285.458 [-590.501, -95.327] - loss: 73.678 - mae: 89.939 - mean_q: 90.443 Interval 2670 (1334500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8561 1 episodes - episode_reward: -321.625 [-321.625, -321.625] - loss: 71.622 - mae: 91.410 - mean_q: 90.362 Interval 2671 (1335000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1017 Interval 2672 (1335500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0303 2 episodes - episode_reward: -274.267 [-414.286, -134.248] - loss: 84.803 - mae: 95.175 - mean_q: 95.566 Interval 2673 (1336000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1043 Interval 2674 (1336500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1807 Interval 2675 (1337000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: 0.0594 Interval 2676 (1337500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0721 Interval 2677 (1338000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -1.0208 1 episodes - episode_reward: -671.152 [-671.152, -671.152] - loss: 92.615 - mae: 104.087 - mean_q: 111.239 Interval 2678 (1338500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.3740 3 episodes - episode_reward: -238.288 [-342.397, -101.734] - loss: 81.910 - mae: 104.988 - mean_q: 113.466 Interval 2679 (1339000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2887 2 episodes - episode_reward: -321.925 [-597.866, -45.984] - loss: 93.207 - mae: 106.827 - mean_q: 113.096 Interval 2680 (1339500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7618 1 episodes - episode_reward: -394.003 [-394.003, -394.003] - loss: 82.218 - mae: 107.165 - mean_q: 115.210 Interval 2681 (1340000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0604 Interval 2682 (1340500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2344 Interval 2683 (1341000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1521 1 episodes - episode_reward: -194.909 [-194.909, -194.909] - loss: 81.114 - mae: 114.342 - mean_q: 124.641 Interval 2684 (1341500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3312 2 episodes - episode_reward: -233.392 [-238.559, -228.225] - loss: 84.785 - mae: 115.446 - mean_q: 127.827 Interval 2685 (1342000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2200 1 episodes - episode_reward: -413.535 [-413.535, -413.535] - loss: 83.179 - mae: 117.474 - mean_q: 130.645 Interval 2686 (1342500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0262 2 episodes - episode_reward: -432.644 [-602.665, -262.624] - loss: 74.564 - mae: 118.427 - mean_q: 132.565 Interval 2687 (1343000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6519 4 episodes - episode_reward: -336.490 [-972.727, -106.497] - loss: 74.266 - mae: 117.925 - mean_q: 131.897 Interval 2688 (1343500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8386 1 episodes - episode_reward: -162.968 [-162.968, -162.968] - loss: 83.955 - mae: 119.396 - mean_q: 135.078 Interval 2689 (1344000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8950 3 episodes - episode_reward: -250.380 [-393.441, -95.376] - loss: 82.290 - mae: 119.314 - mean_q: 132.168 Interval 2690 (1344500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3934 4 episodes - episode_reward: -171.523 [-291.915, -100.000] - loss: 83.868 - mae: 119.935 - mean_q: 133.976 Interval 2691 (1345000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4426 2 episodes - episode_reward: -284.041 [-431.539, -136.543] - loss: 91.942 - mae: 119.912 - mean_q: 135.824 Interval 2692 (1345500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1171 2 episodes - episode_reward: -324.126 [-396.915, -251.337] - loss: 84.560 - mae: 119.002 - mean_q: 132.498 Interval 2693 (1346000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8101 1 episodes - episode_reward: -477.032 [-477.032, -477.032] - loss: 96.069 - mae: 119.120 - mean_q: 133.843 Interval 2694 (1346500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8877 2 episodes - episode_reward: -204.242 [-267.506, -140.978] - loss: 83.538 - mae: 119.006 - mean_q: 134.234 Interval 2695 (1347000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5841 2 episodes - episode_reward: -174.820 [-206.925, -142.715] - loss: 98.465 - mae: 119.693 - mean_q: 134.878 Interval 2696 (1347500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4499 1 episodes - episode_reward: -691.813 [-691.813, -691.813] - loss: 95.568 - mae: 121.556 - mean_q: 139.227 Interval 2697 (1348000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1311 1 episodes - episode_reward: -388.664 [-388.664, -388.664] - loss: 89.167 - mae: 121.403 - mean_q: 139.271 Interval 2698 (1348500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0037 2 episodes - episode_reward: -824.516 [-922.652, -726.380] - loss: 89.711 - mae: 121.692 - mean_q: 140.034 Interval 2699 (1349000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8639 1 episodes - episode_reward: -417.844 [-417.844, -417.844] - loss: 102.325 - mae: 122.876 - mean_q: 142.626 Interval 2700 (1349500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2049 2 episodes - episode_reward: -558.267 [-975.478, -141.056] - loss: 90.769 - mae: 125.053 - mean_q: 145.333 Interval 2701 (1350000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.9804 1 episodes - episode_reward: -2045.133 [-2045.133, -2045.133] - loss: 115.228 - mae: 128.503 - mean_q: 151.194 Interval 2702 (1350500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6913 3 episodes - episode_reward: -78.116 [-208.278, -0.079] - loss: 115.191 - mae: 132.625 - mean_q: 157.819 Interval 2703 (1351000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5952 3 episodes - episode_reward: -456.252 [-619.396, -285.346] - loss: 110.538 - mae: 133.604 - mean_q: 159.193 Interval 2704 (1351500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4977 2 episodes - episode_reward: -135.530 [-170.592, -100.467] - loss: 90.003 - mae: 135.359 - mean_q: 162.163 Interval 2705 (1352000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1757 2 episodes - episode_reward: -703.218 [-1282.241, -124.195] - loss: 104.017 - mae: 135.154 - mean_q: 162.720 Interval 2706 (1352500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.6371 2 episodes - episode_reward: -2009.039 [-3444.241, -573.836] - loss: 113.668 - mae: 137.931 - mean_q: 164.563 Interval 2707 (1353000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.1136 1 episodes - episode_reward: -1026.817 [-1026.817, -1026.817] - loss: 90.298 - mae: 138.828 - mean_q: 166.084 Interval 2708 (1353500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.3642 2 episodes - episode_reward: -509.693 [-905.211, -114.174] - loss: 122.540 - mae: 138.234 - mean_q: 164.458 Interval 2709 (1354000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.5845 4 episodes - episode_reward: -287.202 [-553.496, -123.838] - loss: 116.864 - mae: 137.526 - mean_q: 163.541 Interval 2710 (1354500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9336 1 episodes - episode_reward: -744.149 [-744.149, -744.149] - loss: 137.666 - mae: 136.854 - mean_q: 161.519 Interval 2711 (1355000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9793 1 episodes - episode_reward: -575.581 [-575.581, -575.581] - loss: 108.472 - mae: 135.009 - mean_q: 160.575 Interval 2712 (1355500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4719 2 episodes - episode_reward: -509.867 [-860.448, -159.286] - loss: 136.401 - mae: 134.828 - mean_q: 160.245 Interval 2713 (1356000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7040 1 episodes - episode_reward: -504.293 [-504.293, -504.293] - loss: 92.027 - mae: 134.047 - mean_q: 159.102 Interval 2714 (1356500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1298 2 episodes - episode_reward: -241.826 [-364.239, -119.412] - loss: 103.821 - mae: 132.039 - mean_q: 156.151 Interval 2715 (1357000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1848 1 episodes - episode_reward: -263.163 [-263.163, -263.163] - loss: 142.448 - mae: 131.452 - mean_q: 156.422 Interval 2716 (1357500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0567 1 episodes - episode_reward: -533.229 [-533.229, -533.229] - loss: 95.536 - mae: 130.544 - mean_q: 155.174 Interval 2717 (1358000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5931 1 episodes - episode_reward: -956.686 [-956.686, -956.686] - loss: 94.819 - mae: 129.235 - mean_q: 154.118 Interval 2718 (1358500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4621 2 episodes - episode_reward: -495.667 [-521.403, -469.931] - loss: 96.955 - mae: 130.179 - mean_q: 154.745 Interval 2719 (1359000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.7755 1 episodes - episode_reward: -814.564 [-814.564, -814.564] - loss: 90.647 - mae: 130.530 - mean_q: 156.892 Interval 2720 (1359500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2929 Interval 2721 (1360000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4454 1 episodes - episode_reward: -448.149 [-448.149, -448.149] - loss: 97.339 - mae: 130.228 - mean_q: 155.681 Interval 2722 (1360500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8477 1 episodes - episode_reward: -258.908 [-258.908, -258.908] - loss: 106.132 - mae: 131.768 - mean_q: 157.067 Interval 2723 (1361000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.7985 4 episodes - episode_reward: -507.675 [-762.353, -404.915] - loss: 92.213 - mae: 131.018 - mean_q: 154.341 Interval 2724 (1361500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8111 2 episodes - episode_reward: -199.455 [-294.026, -104.885] - loss: 107.261 - mae: 131.156 - mean_q: 155.379 Interval 2725 (1362000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1313 2 episodes - episode_reward: -611.390 [-1131.866, -90.913] - loss: 79.687 - mae: 129.222 - mean_q: 151.911 Interval 2726 (1362500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.0202 4 episodes - episode_reward: -471.702 [-765.585, -175.034] - loss: 87.356 - mae: 128.451 - mean_q: 150.117 Interval 2727 (1363000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -2.2036 5 episodes - episode_reward: -205.593 [-453.818, -100.000] - loss: 105.895 - mae: 126.154 - mean_q: 145.672 Interval 2728 (1363500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -1.9323 3 episodes - episode_reward: -341.156 [-553.161, -138.704] - loss: 86.190 - mae: 124.144 - mean_q: 141.457 Interval 2729 (1364000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0014 1 episodes - episode_reward: -285.536 [-285.536, -285.536] - loss: 109.818 - mae: 122.503 - mean_q: 139.354 Interval 2730 (1364500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0139 1 episodes - episode_reward: -416.823 [-416.823, -416.823] - loss: 88.190 - mae: 122.920 - mean_q: 138.598 Interval 2731 (1365000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6954 3 episodes - episode_reward: -395.066 [-632.609, -253.284] - loss: 88.526 - mae: 123.879 - mean_q: 139.670 Interval 2732 (1365500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3166 Interval 2733 (1366000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6271 2 episodes - episode_reward: -229.047 [-323.286, -134.808] - loss: 103.874 - mae: 124.892 - mean_q: 141.600 Interval 2734 (1366500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7207 1 episodes - episode_reward: -68.316 [-68.316, -68.316] - loss: 101.386 - mae: 126.798 - mean_q: 144.237 Interval 2735 (1367000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9802 2 episodes - episode_reward: -350.608 [-490.866, -210.350] - loss: 88.381 - mae: 127.350 - mean_q: 143.713 Interval 2736 (1367500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1104 3 episodes - episode_reward: -214.743 [-285.187, -150.570] - loss: 96.321 - mae: 128.205 - mean_q: 143.344 Interval 2737 (1368000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7993 2 episodes - episode_reward: -200.162 [-366.681, -33.644] - loss: 99.528 - mae: 128.730 - mean_q: 142.557 Interval 2738 (1368500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6970 2 episodes - episode_reward: -160.843 [-176.973, -144.713] - loss: 124.852 - mae: 128.811 - mean_q: 142.129 Interval 2739 (1369000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9029 1 episodes - episode_reward: -1373.782 [-1373.782, -1373.782] - loss: 100.341 - mae: 128.895 - mean_q: 140.957 Interval 2740 (1369500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4407 3 episodes - episode_reward: -261.780 [-281.952, -239.037] - loss: 94.549 - mae: 126.220 - mean_q: 135.160 Interval 2741 (1370000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6820 2 episodes - episode_reward: -607.021 [-1080.504, -133.538] - loss: 90.360 - mae: 124.862 - mean_q: 132.697 Interval 2742 (1370500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2431 7 episodes - episode_reward: -182.003 [-276.921, -95.682] - loss: 117.409 - mae: 124.667 - mean_q: 133.678 Interval 2743 (1371000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7456 Interval 2744 (1371500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.5304 4 episodes - episode_reward: -400.784 [-753.962, -194.270] - loss: 92.417 - mae: 120.820 - mean_q: 127.649 Interval 2745 (1372000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5849 2 episodes - episode_reward: -151.933 [-172.672, -131.195] - loss: 99.129 - mae: 119.855 - mean_q: 125.793 Interval 2746 (1372500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3711 3 episodes - episode_reward: -532.071 [-1138.966, -100.000] - loss: 90.462 - mae: 117.883 - mean_q: 120.513 Interval 2747 (1373000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5854 3 episodes - episode_reward: -304.741 [-356.550, -239.363] - loss: 113.676 - mae: 116.275 - mean_q: 118.665 Interval 2748 (1373500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7471 Interval 2749 (1374000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6381 3 episodes - episode_reward: -557.839 [-1056.283, -134.200] - loss: 100.875 - mae: 113.597 - mean_q: 118.022 Interval 2750 (1374500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.5666 2 episodes - episode_reward: -415.876 [-457.119, -374.633] - loss: 141.434 - mae: 114.371 - mean_q: 116.944 Interval 2751 (1375000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.6464 3 episodes - episode_reward: -592.101 [-1288.474, -92.878] - loss: 127.184 - mae: 112.008 - mean_q: 114.378 Interval 2752 (1375500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.8398 5 episodes - episode_reward: -288.152 [-456.167, -100.000] - loss: 104.564 - mae: 112.419 - mean_q: 113.825 Interval 2753 (1376000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2236 2 episodes - episode_reward: -505.625 [-562.709, -448.541] - loss: 113.739 - mae: 112.200 - mean_q: 113.271 Interval 2754 (1376500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4278 2 episodes - episode_reward: -384.711 [-488.707, -280.714] - loss: 111.733 - mae: 111.501 - mean_q: 114.630 Interval 2755 (1377000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.3152 1 episodes - episode_reward: -626.305 [-626.305, -626.305] - loss: 142.543 - mae: 113.045 - mean_q: 115.051 Interval 2756 (1377500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.7763 3 episodes - episode_reward: -410.775 [-565.387, -219.011] - loss: 99.220 - mae: 112.798 - mean_q: 114.730 Interval 2757 (1378000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0135 3 episodes - episode_reward: -405.541 [-508.748, -322.306] - loss: 164.091 - mae: 114.504 - mean_q: 118.951 Interval 2758 (1378500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.2839 7 episodes - episode_reward: -306.027 [-581.276, -100.000] - loss: 106.127 - mae: 115.130 - mean_q: 120.523 Interval 2759 (1379000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.6762 3 episodes - episode_reward: -427.033 [-632.138, -104.687] - loss: 205.638 - mae: 119.473 - mean_q: 124.943 Interval 2760 (1379500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7904 1 episodes - episode_reward: -993.214 [-993.214, -993.214] - loss: 158.798 - mae: 120.469 - mean_q: 128.206 Interval 2761 (1380000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0003 3 episodes - episode_reward: -300.477 [-409.364, -235.911] - loss: 138.053 - mae: 120.360 - mean_q: 129.397 Interval 2762 (1380500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9731 2 episodes - episode_reward: -481.472 [-559.286, -403.657] - loss: 115.566 - mae: 124.729 - mean_q: 137.767 Interval 2763 (1381000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.2935 3 episodes - episode_reward: -409.363 [-799.557, 35.057] - loss: 151.461 - mae: 128.568 - mean_q: 141.986 Interval 2764 (1381500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3737 2 episodes - episode_reward: -303.543 [-312.914, -294.173] - loss: 197.534 - mae: 130.893 - mean_q: 144.640 Interval 2765 (1382000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7571 2 episodes - episode_reward: -311.804 [-450.227, -173.381] - loss: 137.888 - mae: 130.058 - mean_q: 143.182 Interval 2766 (1382500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5815 3 episodes - episode_reward: -366.599 [-679.251, -88.222] - loss: 168.269 - mae: 135.070 - mean_q: 151.225 Interval 2767 (1383000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3620 3 episodes - episode_reward: -232.649 [-310.232, -142.161] - loss: 148.529 - mae: 134.401 - mean_q: 148.083 Interval 2768 (1383500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1382 3 episodes - episode_reward: -178.404 [-336.964, -98.247] - loss: 192.106 - mae: 136.963 - mean_q: 153.275 Interval 2769 (1384000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1522 3 episodes - episode_reward: -195.543 [-341.932, -61.997] - loss: 150.829 - mae: 139.378 - mean_q: 157.929 Interval 2770 (1384500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5431 5 episodes - episode_reward: -145.594 [-189.228, -100.000] - loss: 178.382 - mae: 140.375 - mean_q: 162.107 Interval 2771 (1385000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9797 3 episodes - episode_reward: -166.533 [-219.554, -126.038] - loss: 185.175 - mae: 145.589 - mean_q: 169.947 Interval 2772 (1385500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7943 5 episodes - episode_reward: -192.068 [-259.540, -126.741] - loss: 169.521 - mae: 149.128 - mean_q: 174.240 Interval 2773 (1386000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9825 3 episodes - episode_reward: -324.043 [-742.295, -100.000] - loss: 177.972 - mae: 151.658 - mean_q: 176.963 Interval 2774 (1386500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1886 2 episodes - episode_reward: -231.126 [-285.381, -176.870] - loss: 189.503 - mae: 152.445 - mean_q: 176.403 Interval 2775 (1387000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6717 4 episodes - episode_reward: -252.383 [-268.227, -234.173] - loss: 186.270 - mae: 155.292 - mean_q: 180.656 Interval 2776 (1387500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4378 3 episodes - episode_reward: -213.292 [-307.737, -69.601] - loss: 162.669 - mae: 155.716 - mean_q: 180.120 Interval 2777 (1388000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8575 3 episodes - episode_reward: -253.634 [-341.486, -165.489] - loss: 166.487 - mae: 159.579 - mean_q: 185.467 Interval 2778 (1388500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4170 5 episodes - episode_reward: -287.830 [-414.098, -162.874] - loss: 146.101 - mae: 162.411 - mean_q: 187.912 Interval 2779 (1389000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2386 5 episodes - episode_reward: -206.184 [-314.429, -94.464] - loss: 162.541 - mae: 166.381 - mean_q: 191.447 Interval 2780 (1389500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4911 2 episodes - episode_reward: -351.469 [-488.125, -214.813] - loss: 165.731 - mae: 170.303 - mean_q: 195.266 Interval 2781 (1390000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1979 5 episodes - episode_reward: -239.782 [-506.419, -100.927] - loss: 185.374 - mae: 170.171 - mean_q: 192.961 Interval 2782 (1390500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8126 4 episodes - episode_reward: -232.844 [-372.044, -100.000] - loss: 150.952 - mae: 173.855 - mean_q: 199.321 Interval 2783 (1391000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1590 6 episodes - episode_reward: -179.807 [-442.553, -42.231] - loss: 201.782 - mae: 176.389 - mean_q: 199.471 Interval 2784 (1391500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4462 4 episodes - episode_reward: -282.486 [-381.123, -176.353] - loss: 224.093 - mae: 176.618 - mean_q: 199.276 Interval 2785 (1392000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0294 2 episodes - episode_reward: -322.633 [-499.346, -145.919] - loss: 246.140 - mae: 181.987 - mean_q: 209.149 Interval 2786 (1392500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.3141 4 episodes - episode_reward: -341.792 [-715.551, -98.949] - loss: 248.576 - mae: 184.201 - mean_q: 210.742 Interval 2787 (1393000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8955 3 episodes - episode_reward: -281.278 [-359.794, -204.727] - loss: 217.822 - mae: 190.167 - mean_q: 219.216 Interval 2788 (1393500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4997 5 episodes - episode_reward: -295.929 [-484.637, -125.710] - loss: 295.917 - mae: 199.348 - mean_q: 231.060 Interval 2789 (1394000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.2383 4 episodes - episode_reward: -439.071 [-540.373, -244.137] - loss: 523.414 - mae: 206.224 - mean_q: 242.122 Interval 2790 (1394500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0480 1 episodes - episode_reward: -269.002 [-269.002, -269.002] - loss: 250.938 - mae: 212.986 - mean_q: 250.038 Interval 2791 (1395000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8705 4 episodes - episode_reward: -178.648 [-407.265, -47.966] - loss: 293.340 - mae: 227.836 - mean_q: 272.655 Interval 2792 (1395500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9918 5 episodes - episode_reward: -270.456 [-647.918, -98.782] - loss: 321.164 - mae: 238.712 - mean_q: 287.458 Interval 2793 (1396000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6437 1 episodes - episode_reward: -274.830 [-274.830, -274.830] - loss: 417.228 - mae: 255.339 - mean_q: 308.702 Interval 2794 (1396500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1006 3 episodes - episode_reward: -409.392 [-622.999, -140.491] - loss: 711.584 - mae: 266.610 - mean_q: 322.376 Interval 2795 (1397000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4555 3 episodes - episode_reward: -106.988 [-132.528, -92.498] - loss: 457.241 - mae: 272.348 - mean_q: 330.042 Interval 2796 (1397500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9319 3 episodes - episode_reward: -251.432 [-528.091, -102.773] - loss: 454.518 - mae: 279.379 - mean_q: 342.447 Interval 2797 (1398000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1306 4 episodes - episode_reward: -296.733 [-531.436, -108.104] - loss: 590.864 - mae: 285.012 - mean_q: 347.973 Interval 2798 (1398500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2372 1 episodes - episode_reward: -134.527 [-134.527, -134.527] - loss: 550.843 - mae: 290.735 - mean_q: 355.379 Interval 2799 (1399000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6484 1 episodes - episode_reward: -800.741 [-800.741, -800.741] - loss: 468.331 - mae: 293.759 - mean_q: 360.895 Interval 2800 (1399500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3101 2 episodes - episode_reward: -328.599 [-557.198, -100.000] - loss: 441.052 - mae: 299.374 - mean_q: 374.641 Interval 2801 (1400000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.2912 1 episodes - episode_reward: -530.726 [-530.726, -530.726] - loss: 457.778 - mae: 299.740 - mean_q: 376.912 Interval 2802 (1400500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0459 2 episodes - episode_reward: -806.769 [-959.427, -654.111] - loss: 472.498 - mae: 308.212 - mean_q: 390.071 Interval 2803 (1401000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7569 2 episodes - episode_reward: -432.301 [-539.141, -325.461] - loss: 445.156 - mae: 314.525 - mean_q: 400.997 Interval 2804 (1401500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7500 1 episodes - episode_reward: -637.412 [-637.412, -637.412] - loss: 424.301 - mae: 317.041 - mean_q: 407.458 Interval 2805 (1402000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4462 4 episodes - episode_reward: -346.279 [-527.439, -112.504] - loss: 526.142 - mae: 310.660 - mean_q: 400.883 Interval 2806 (1402500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7729 1 episodes - episode_reward: -981.511 [-981.511, -981.511] - loss: 512.444 - mae: 307.855 - mean_q: 399.110 Interval 2807 (1403000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.5634 2 episodes - episode_reward: -653.987 [-855.402, -452.572] - loss: 427.257 - mae: 304.954 - mean_q: 396.339 Interval 2808 (1403500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7735 1 episodes - episode_reward: -618.165 [-618.165, -618.165] - loss: 411.703 - mae: 302.523 - mean_q: 392.100 Interval 2809 (1404000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.1419 2 episodes - episode_reward: -889.318 [-1649.072, -129.564] - loss: 544.918 - mae: 297.683 - mean_q: 386.279 Interval 2810 (1404500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0870 1 episodes - episode_reward: -486.857 [-486.857, -486.857] - loss: 468.173 - mae: 292.876 - mean_q: 378.539 Interval 2811 (1405000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2137 Interval 2812 (1405500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1856 Interval 2813 (1406000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.3571 Interval 2814 (1406500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -1.1411 3 episodes - episode_reward: -351.804 [-680.583, -173.569] - loss: 527.352 - mae: 330.229 - mean_q: 426.981 Interval 2815 (1407000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4948 2 episodes - episode_reward: -113.565 [-136.336, -90.793] - loss: 544.391 - mae: 349.781 - mean_q: 454.481 Interval 2816 (1407500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3622 5 episodes - episode_reward: -133.491 [-215.259, -45.966] - loss: 700.202 - mae: 372.888 - mean_q: 485.045 Interval 2817 (1408000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2862 1 episodes - episode_reward: -121.107 [-121.107, -121.107] - loss: 758.276 - mae: 399.747 - mean_q: 522.871 Interval 2818 (1408500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0541 1 episodes - episode_reward: -42.037 [-42.037, -42.037] - loss: 863.917 - mae: 432.568 - mean_q: 567.814 Interval 2819 (1409000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3459 Interval 2820 (1409500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2507 Interval 2821 (1410000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1349 Interval 2822 (1410500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.2617 Interval 2823 (1411000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5886 3 episodes - episode_reward: -426.945 [-675.186, -138.721] - loss: 809.260 - mae: 487.326 - mean_q: 635.855 Interval 2824 (1411500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9108 1 episodes - episode_reward: -289.690 [-289.690, -289.690] - loss: 784.651 - mae: 483.398 - mean_q: 630.751 Interval 2825 (1412000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4728 2 episodes - episode_reward: -460.662 [-467.815, -453.510] - loss: 743.463 - mae: 485.631 - mean_q: 636.109 Interval 2826 (1412500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7382 2 episodes - episode_reward: -253.062 [-353.605, -152.520] - loss: 779.220 - mae: 490.414 - mean_q: 642.898 Interval 2827 (1413000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5980 2 episodes - episode_reward: -444.579 [-631.593, -257.565] - loss: 720.210 - mae: 490.679 - mean_q: 644.165 Interval 2828 (1413500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3279 5 episodes - episode_reward: -301.897 [-450.673, -88.823] - loss: 755.924 - mae: 492.903 - mean_q: 647.778 Interval 2829 (1414000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4041 5 episodes - episode_reward: -229.348 [-554.942, -57.269] - loss: 828.969 - mae: 498.358 - mean_q: 657.679 Interval 2830 (1414500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0507 2 episodes - episode_reward: -404.098 [-412.458, -395.737] - loss: 845.355 - mae: 508.281 - mean_q: 670.458 Interval 2831 (1415000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0247 2 episodes - episode_reward: -445.389 [-473.159, -417.619] - loss: 796.762 - mae: 522.582 - mean_q: 688.984 Interval 2832 (1415500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4166 2 episodes - episode_reward: -456.956 [-465.256, -448.656] - loss: 933.379 - mae: 534.885 - mean_q: 709.544 Interval 2833 (1416000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0037 2 episodes - episode_reward: -270.065 [-340.007, -200.122] - loss: 959.113 - mae: 547.011 - mean_q: 727.282 Interval 2834 (1416500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7907 3 episodes - episode_reward: -496.919 [-585.790, -333.838] - loss: 988.114 - mae: 533.051 - mean_q: 705.074 Interval 2835 (1417000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1573 5 episodes - episode_reward: -191.175 [-331.344, -100.000] - loss: 830.664 - mae: 544.112 - mean_q: 718.874 Interval 2836 (1417500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5326 2 episodes - episode_reward: -697.627 [-918.875, -476.379] - loss: 825.430 - mae: 541.123 - mean_q: 713.262 Interval 2837 (1418000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8707 1 episodes - episode_reward: -830.046 [-830.046, -830.046] - loss: 906.250 - mae: 545.353 - mean_q: 719.351 Interval 2838 (1418500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3254 3 episodes - episode_reward: -430.485 [-642.275, -138.384] - loss: 877.961 - mae: 535.893 - mean_q: 704.975 Interval 2839 (1419000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1609 1 episodes - episode_reward: -401.384 [-401.384, -401.384] - loss: 961.533 - mae: 543.935 - mean_q: 716.294 Interval 2840 (1419500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0218 2 episodes - episode_reward: -521.168 [-806.575, -235.761] - loss: 870.935 - mae: 540.908 - mean_q: 711.937 Interval 2841 (1420000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.7102 3 episodes - episode_reward: -642.284 [-775.167, -492.390] - loss: 865.398 - mae: 533.207 - mean_q: 701.632 Interval 2842 (1420500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.6432 2 episodes - episode_reward: -912.896 [-1098.475, -727.317] - loss: 868.296 - mae: 527.931 - mean_q: 693.032 Interval 2843 (1421000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.9737 3 episodes - episode_reward: -842.409 [-1569.173, -443.733] - loss: 1435.113 - mae: 535.063 - mean_q: 701.246 Interval 2844 (1421500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9403 5 episodes - episode_reward: -294.233 [-555.590, -102.008] - loss: 1453.647 - mae: 528.680 - mean_q: 693.171 Interval 2845 (1422000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6922 2 episodes - episode_reward: -341.368 [-462.813, -219.922] - loss: 671.713 - mae: 519.125 - mean_q: 681.949 Interval 2846 (1422500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3037 2 episodes - episode_reward: -391.454 [-430.436, -352.472] - loss: 758.808 - mae: 517.189 - mean_q: 680.088 Interval 2847 (1423000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8962 2 episodes - episode_reward: -912.990 [-1503.908, -322.072] - loss: 981.777 - mae: 513.095 - mean_q: 675.958 Interval 2848 (1423500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1994 4 episodes - episode_reward: -301.719 [-391.483, -148.742] - loss: 1399.009 - mae: 507.604 - mean_q: 669.856 Interval 2849 (1424000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0734 2 episodes - episode_reward: -260.378 [-369.649, -151.107] - loss: 656.397 - mae: 503.759 - mean_q: 666.071 Interval 2850 (1424500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9764 3 episodes - episode_reward: -284.025 [-371.034, -186.502] - loss: 857.538 - mae: 497.819 - mean_q: 658.407 Interval 2851 (1425000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6706 1 episodes - episode_reward: -459.723 [-459.723, -459.723] - loss: 915.507 - mae: 492.483 - mean_q: 650.669 Interval 2852 (1425500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3772 3 episodes - episode_reward: -240.538 [-353.884, -100.000] - loss: 776.333 - mae: 490.983 - mean_q: 649.055 Interval 2853 (1426000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8527 4 episodes - episode_reward: -206.488 [-305.174, -130.013] - loss: 654.071 - mae: 482.491 - mean_q: 636.252 Interval 2854 (1426500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3242 1 episodes - episode_reward: -844.701 [-844.701, -844.701] - loss: 655.518 - mae: 473.335 - mean_q: 624.219 Interval 2855 (1427000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8980 Interval 2856 (1427500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6728 2 episodes - episode_reward: -335.550 [-571.099, -100.000] - loss: 684.148 - mae: 458.586 - mean_q: 603.894 Interval 2857 (1428000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5576 Interval 2858 (1428500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7472 1 episodes - episode_reward: -966.782 [-966.782, -966.782] - loss: 641.702 - mae: 439.078 - mean_q: 578.153 Interval 2859 (1429000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8958 2 episodes - episode_reward: -357.091 [-544.593, -169.589] - loss: 580.932 - mae: 435.204 - mean_q: 573.646 Interval 2860 (1429500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3801 2 episodes - episode_reward: -339.525 [-455.727, -223.324] - loss: 585.889 - mae: 427.087 - mean_q: 561.839 Interval 2861 (1430000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0049 2 episodes - episode_reward: -253.337 [-298.531, -208.143] - loss: 540.584 - mae: 420.927 - mean_q: 553.374 Interval 2862 (1430500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0928 7 episodes - episode_reward: -220.656 [-353.359, -103.463] - loss: 491.087 - mae: 410.068 - mean_q: 538.044 Interval 2863 (1431000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2712 Interval 2864 (1431500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4081 Interval 2865 (1432000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2200 1 episodes - episode_reward: -481.798 [-481.798, -481.798] - loss: 650.864 - mae: 387.184 - mean_q: 505.929 Interval 2866 (1432500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3897 Interval 2867 (1433000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1149 Interval 2868 (1433500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.5816 2 episodes - episode_reward: -234.423 [-245.114, -223.732] - loss: 536.045 - mae: 372.245 - mean_q: 488.164 Interval 2869 (1434000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6607 2 episodes - episode_reward: -189.768 [-201.814, -177.723] - loss: 514.929 - mae: 368.730 - mean_q: 484.344 Interval 2870 (1434500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5648 2 episodes - episode_reward: -126.165 [-132.686, -119.643] - loss: 570.393 - mae: 366.995 - mean_q: 482.869 Interval 2871 (1435000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4475 1 episodes - episode_reward: -183.711 [-183.711, -183.711] - loss: 473.478 - mae: 367.638 - mean_q: 483.511 Interval 2872 (1435500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2691 Interval 2873 (1436000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1704 Interval 2874 (1436500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2215 Interval 2875 (1437000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1808 Interval 2876 (1437500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1173 1 episodes - episode_reward: -903.379 [-903.379, -903.379] - loss: 581.252 - mae: 389.516 - mean_q: 513.731 Interval 2877 (1438000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0980 1 episodes - episode_reward: -602.298 [-602.298, -602.298] - loss: 582.546 - mae: 392.236 - mean_q: 518.947 Interval 2878 (1438500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5716 1 episodes - episode_reward: -490.966 [-490.966, -490.966] - loss: 549.337 - mae: 394.287 - mean_q: 521.724 Interval 2879 (1439000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4580 2 episodes - episode_reward: -429.771 [-555.804, -303.739] - loss: 553.168 - mae: 394.774 - mean_q: 521.328 Interval 2880 (1439500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.2620 2 episodes - episode_reward: -670.578 [-816.009, -525.147] - loss: 411.077 - mae: 393.884 - mean_q: 519.015 Interval 2881 (1440000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7296 2 episodes - episode_reward: -449.465 [-495.901, -403.029] - loss: 444.081 - mae: 389.461 - mean_q: 513.704 Interval 2882 (1440500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3534 2 episodes - episode_reward: -377.628 [-448.457, -306.798] - loss: 447.094 - mae: 392.099 - mean_q: 516.015 Interval 2883 (1441000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1847 2 episodes - episode_reward: -254.916 [-292.905, -216.927] - loss: 512.162 - mae: 391.040 - mean_q: 514.271 Interval 2884 (1441500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6280 4 episodes - episode_reward: -202.719 [-316.707, -100.000] - loss: 512.041 - mae: 389.847 - mean_q: 514.573 Interval 2885 (1442000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5507 2 episodes - episode_reward: -136.951 [-176.618, -97.283] - loss: 449.053 - mae: 386.851 - mean_q: 510.636 Interval 2886 (1442500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5151 1 episodes - episode_reward: -102.701 [-102.701, -102.701] - loss: 499.282 - mae: 384.999 - mean_q: 507.865 Interval 2887 (1443000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8005 1 episodes - episode_reward: -383.665 [-383.665, -383.665] - loss: 438.534 - mae: 384.661 - mean_q: 507.759 Interval 2888 (1443500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.5199 4 episodes - episode_reward: -345.950 [-676.674, -36.761] - loss: 484.612 - mae: 382.348 - mean_q: 503.641 Interval 2889 (1444000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4468 Interval 2890 (1444500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1398 1 episodes - episode_reward: -645.592 [-645.592, -645.592] - loss: 457.668 - mae: 379.279 - mean_q: 499.730 Interval 2891 (1445000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3135 3 episodes - episode_reward: -251.224 [-411.010, -108.128] - loss: 517.419 - mae: 373.664 - mean_q: 490.122 Interval 2892 (1445500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6780 Interval 2893 (1446000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8363 2 episodes - episode_reward: -436.353 [-772.705, -100.000] - loss: 490.462 - mae: 359.785 - mean_q: 469.966 Interval 2894 (1446500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9743 2 episodes - episode_reward: -481.618 [-840.326, -122.911] - loss: 412.587 - mae: 353.111 - mean_q: 462.111 Interval 2895 (1447000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8455 Interval 2896 (1447500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7931 1 episodes - episode_reward: -697.328 [-697.328, -697.328] - loss: 399.397 - mae: 333.256 - mean_q: 433.510 Interval 2897 (1448000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9694 2 episodes - episode_reward: -716.233 [-931.246, -501.220] - loss: 372.062 - mae: 324.869 - mean_q: 421.338 Interval 2898 (1448500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.2784 4 episodes - episode_reward: -433.763 [-553.788, -275.948] - loss: 340.713 - mae: 318.910 - mean_q: 413.232 Interval 2899 (1449000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4435 2 episodes - episode_reward: -291.032 [-461.726, -120.338] - loss: 364.802 - mae: 311.327 - mean_q: 402.743 Interval 2900 (1449500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5837 1 episodes - episode_reward: -874.941 [-874.941, -874.941] - loss: 393.668 - mae: 312.165 - mean_q: 403.526 Interval 2901 (1450000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9166 6 episodes - episode_reward: -255.189 [-426.658, -130.763] - loss: 399.078 - mae: 309.038 - mean_q: 397.586 Interval 2902 (1450500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5356 1 episodes - episode_reward: -283.648 [-283.648, -283.648] - loss: 345.689 - mae: 309.112 - mean_q: 397.017 Interval 2903 (1451000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3560 Interval 2904 (1451500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -1.1263 1 episodes - episode_reward: -809.103 [-809.103, -809.103] - loss: 344.866 - mae: 310.281 - mean_q: 400.260 Interval 2905 (1452000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9913 Interval 2906 (1452500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6986 1 episodes - episode_reward: -838.909 [-838.909, -838.909] - loss: 455.926 - mae: 310.026 - mean_q: 398.380 Interval 2907 (1453000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6893 1 episodes - episode_reward: -403.534 [-403.534, -403.534] - loss: 426.700 - mae: 308.636 - mean_q: 395.792 Interval 2908 (1453500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.3618 2 episodes - episode_reward: -787.476 [-1374.448, -200.504] - loss: 395.934 - mae: 312.220 - mean_q: 399.971 Interval 2909 (1454000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4837 2 episodes - episode_reward: -379.229 [-446.809, -311.649] - loss: 385.750 - mae: 305.801 - mean_q: 389.664 Interval 2910 (1454500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9172 1 episodes - episode_reward: -486.803 [-486.803, -486.803] - loss: 381.793 - mae: 306.142 - mean_q: 390.377 Interval 2911 (1455000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.6545 4 episodes - episode_reward: -470.062 [-727.202, -306.919] - loss: 393.711 - mae: 306.747 - mean_q: 391.664 Interval 2912 (1455500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2773 2 episodes - episode_reward: -231.508 [-239.998, -223.017] - loss: 361.829 - mae: 309.926 - mean_q: 395.689 Interval 2913 (1456000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7626 2 episodes - episode_reward: -381.873 [-432.963, -330.783] - loss: 433.248 - mae: 311.892 - mean_q: 398.475 Interval 2914 (1456500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8719 2 episodes - episode_reward: -342.078 [-573.660, -110.497] - loss: 427.360 - mae: 323.252 - mean_q: 415.844 Interval 2915 (1457000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2708 5 episodes - episode_reward: -225.834 [-418.087, -64.425] - loss: 633.796 - mae: 336.364 - mean_q: 434.955 Interval 2916 (1457500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6230 3 episodes - episode_reward: -271.831 [-419.070, -151.006] - loss: 551.284 - mae: 361.957 - mean_q: 470.732 Interval 2917 (1458000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9963 3 episodes - episode_reward: -156.628 [-266.189, -92.437] - loss: 667.046 - mae: 392.934 - mean_q: 513.061 Interval 2918 (1458500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1923 2 episodes - episode_reward: -320.320 [-583.906, -56.734] - loss: 789.482 - mae: 426.987 - mean_q: 560.730 Interval 2919 (1459000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2046 3 episodes - episode_reward: -185.876 [-226.457, -108.599] - loss: 911.709 - mae: 459.136 - mean_q: 601.568 Interval 2920 (1459500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8460 2 episodes - episode_reward: -145.610 [-212.616, -78.604] - loss: 804.366 - mae: 484.815 - mean_q: 637.647 Interval 2921 (1460000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9054 7 episodes - episode_reward: -234.038 [-434.600, -3.145] - loss: 798.997 - mae: 505.814 - mean_q: 667.574 Interval 2922 (1460500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1031 5 episodes - episode_reward: -203.703 [-394.505, -38.317] - loss: 793.879 - mae: 513.136 - mean_q: 676.497 Interval 2923 (1461000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8846 1 episodes - episode_reward: -451.660 [-451.660, -451.660] - loss: 873.568 - mae: 527.311 - mean_q: 695.681 Interval 2924 (1461500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3832 1 episodes - episode_reward: -131.782 [-131.782, -131.782] - loss: 737.383 - mae: 547.834 - mean_q: 722.397 Interval 2925 (1462000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1257 Interval 2926 (1462500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5942 1 episodes - episode_reward: -359.059 [-359.059, -359.059] - loss: 796.600 - mae: 565.841 - mean_q: 745.697 Interval 2927 (1463000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1407 Interval 2928 (1463500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2199 Interval 2929 (1464000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7665 1 episodes - episode_reward: -439.272 [-439.272, -439.272] - loss: 845.889 - mae: 595.330 - mean_q: 785.112 Interval 2930 (1464500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5036 3 episodes - episode_reward: -322.057 [-381.742, -241.543] - loss: 722.112 - mae: 594.870 - mean_q: 785.702 Interval 2931 (1465000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7652 1 episodes - episode_reward: -298.191 [-298.191, -298.191] - loss: 660.817 - mae: 591.187 - mean_q: 779.782 Interval 2932 (1465500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2663 Interval 2933 (1466000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6879 1 episodes - episode_reward: -418.692 [-418.692, -418.692] - loss: 750.987 - mae: 607.239 - mean_q: 800.262 Interval 2934 (1466500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3620 Interval 2935 (1467000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2587 Interval 2936 (1467500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1190 3 episodes - episode_reward: -281.688 [-576.284, -110.716] - loss: 684.546 - mae: 609.435 - mean_q: 804.461 Interval 2937 (1468000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1222 Interval 2938 (1468500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8953 Interval 2939 (1469000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.6058 2 episodes - episode_reward: -1204.963 [-2023.359, -386.568] - loss: 686.599 - mae: 596.777 - mean_q: 785.887 Interval 2940 (1469500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7594 5 episodes - episode_reward: -268.503 [-537.702, -100.000] - loss: 576.812 - mae: 584.837 - mean_q: 769.496 Interval 2941 (1470000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0176 3 episodes - episode_reward: -348.530 [-412.951, -308.418] - loss: 582.229 - mae: 576.048 - mean_q: 756.883 Interval 2942 (1470500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8300 2 episodes - episode_reward: -388.177 [-568.842, -207.512] - loss: 608.781 - mae: 567.294 - mean_q: 746.207 Interval 2943 (1471000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3890 Interval 2944 (1471500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5009 2 episodes - episode_reward: -679.368 [-925.596, -433.141] - loss: 530.134 - mae: 552.580 - mean_q: 727.054 Interval 2945 (1472000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2279 4 episodes - episode_reward: -433.993 [-615.419, -161.431] - loss: 599.179 - mae: 546.288 - mean_q: 717.642 Interval 2946 (1472500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.5649 5 episodes - episode_reward: -371.151 [-588.206, -169.157] - loss: 571.420 - mae: 533.192 - mean_q: 699.927 Interval 2947 (1473000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9769 5 episodes - episode_reward: -298.827 [-451.141, -130.079] - loss: 528.120 - mae: 529.565 - mean_q: 694.041 Interval 2948 (1473500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.5797 5 episodes - episode_reward: -343.753 [-644.270, -216.411] - loss: 608.615 - mae: 523.485 - mean_q: 684.080 Interval 2949 (1474000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1979 4 episodes - episode_reward: -436.787 [-713.344, -92.535] - loss: 538.206 - mae: 523.516 - mean_q: 685.482 Interval 2950 (1474500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6328 3 episodes - episode_reward: -247.461 [-331.485, -140.661] - loss: 626.772 - mae: 527.728 - mean_q: 690.387 Interval 2951 (1475000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.0511 3 episodes - episode_reward: -506.376 [-735.601, -217.766] - loss: 670.161 - mae: 537.395 - mean_q: 703.198 Interval 2952 (1475500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0827 Interval 2953 (1476000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8214 3 episodes - episode_reward: -443.481 [-723.687, -183.759] - loss: 552.929 - mae: 539.400 - mean_q: 706.078 Interval 2954 (1476500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1123 Interval 2955 (1477000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2222 Interval 2956 (1477500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4545 Interval 2957 (1478000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2457 1 episodes - episode_reward: -802.746 [-802.746, -802.746] - loss: 569.350 - mae: 558.235 - mean_q: 735.838 Interval 2958 (1478500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6353 2 episodes - episode_reward: -578.423 [-635.370, -521.476] - loss: 620.732 - mae: 557.890 - mean_q: 735.963 Interval 2959 (1479000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0433 1 episodes - episode_reward: -791.042 [-791.042, -791.042] - loss: 600.926 - mae: 559.898 - mean_q: 739.257 Interval 2960 (1479500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3933 3 episodes - episode_reward: -472.685 [-523.643, -393.639] - loss: 562.688 - mae: 556.910 - mean_q: 734.018 Interval 2961 (1480000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5099 1 episodes - episode_reward: -589.836 [-589.836, -589.836] - loss: 649.982 - mae: 562.483 - mean_q: 743.539 Interval 2962 (1480500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7574 3 episodes - episode_reward: -539.969 [-642.083, -469.656] - loss: 659.068 - mae: 560.969 - mean_q: 742.084 Interval 2963 (1481000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2612 1 episodes - episode_reward: -351.020 [-351.020, -351.020] - loss: 663.696 - mae: 564.927 - mean_q: 747.586 Interval 2964 (1481500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0481 1 episodes - episode_reward: -495.810 [-495.810, -495.810] - loss: 640.153 - mae: 565.886 - mean_q: 747.827 Interval 2965 (1482000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0006 1 episodes - episode_reward: -449.411 [-449.411, -449.411] - loss: 615.208 - mae: 561.300 - mean_q: 739.637 Interval 2966 (1482500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1285 5 episodes - episode_reward: -286.041 [-430.932, -50.066] - loss: 660.681 - mae: 556.482 - mean_q: 731.487 Interval 2967 (1483000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.2011 4 episodes - episode_reward: -379.976 [-580.002, -135.493] - loss: 658.333 - mae: 551.137 - mean_q: 723.182 Interval 2968 (1483500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3564 1 episodes - episode_reward: -632.795 [-632.795, -632.795] - loss: 649.048 - mae: 539.824 - mean_q: 706.412 Interval 2969 (1484000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0882 2 episodes - episode_reward: -322.420 [-373.426, -271.415] - loss: 583.732 - mae: 529.230 - mean_q: 692.437 Interval 2970 (1484500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2386 2 episodes - episode_reward: -252.382 [-269.721, -235.043] - loss: 545.589 - mae: 527.143 - mean_q: 690.882 Interval 2971 (1485000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.8608 1 episodes - episode_reward: -1559.135 [-1559.135, -1559.135] - loss: 501.267 - mae: 510.243 - mean_q: 665.289 Interval 2972 (1485500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9588 4 episodes - episode_reward: -194.927 [-372.898, -94.792] - loss: 579.891 - mae: 501.772 - mean_q: 652.585 Interval 2973 (1486000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4451 3 episodes - episode_reward: -282.453 [-540.793, -57.858] - loss: 538.587 - mae: 490.382 - mean_q: 637.582 Interval 2974 (1486500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9397 3 episodes - episode_reward: -287.694 [-314.875, -240.131] - loss: 526.714 - mae: 485.487 - mean_q: 629.330 Interval 2975 (1487000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7008 Interval 2976 (1487500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5786 1 episodes - episode_reward: -1205.418 [-1205.418, -1205.418] - loss: 576.248 - mae: 467.285 - mean_q: 601.859 Interval 2977 (1488000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4445 1 episodes - episode_reward: -677.749 [-677.749, -677.749] - loss: 502.778 - mae: 455.891 - mean_q: 585.358 Interval 2978 (1488500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9169 1 episodes - episode_reward: -446.969 [-446.969, -446.969] - loss: 565.206 - mae: 446.953 - mean_q: 572.831 Interval 2979 (1489000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.9867 2 episodes - episode_reward: -569.558 [-886.927, -252.188] - loss: 470.770 - mae: 436.433 - mean_q: 559.351 Interval 2980 (1489500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5565 Interval 2981 (1490000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6836 Interval 2982 (1490500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.3277 4 episodes - episode_reward: -387.287 [-662.753, -100.000] - loss: 453.854 - mae: 401.093 - mean_q: 513.050 Interval 2983 (1491000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8033 1 episodes - episode_reward: -356.838 [-356.838, -356.838] - loss: 430.930 - mae: 391.777 - mean_q: 497.674 Interval 2984 (1491500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9871 1 episodes - episode_reward: -724.124 [-724.124, -724.124] - loss: 450.324 - mae: 381.480 - mean_q: 482.654 Interval 2985 (1492000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7281 Interval 2986 (1492500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8550 1 episodes - episode_reward: -797.100 [-797.100, -797.100] - loss: 347.946 - mae: 365.443 - mean_q: 458.686 Interval 2987 (1493000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8314 3 episodes - episode_reward: -424.398 [-907.708, -175.849] - loss: 346.400 - mae: 356.742 - mean_q: 449.493 Interval 2988 (1493500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1352 Interval 2989 (1494000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0676 3 episodes - episode_reward: -566.376 [-1187.983, -247.083] - loss: 403.406 - mae: 339.381 - mean_q: 424.894 Interval 2990 (1494500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6793 1 episodes - episode_reward: -361.225 [-361.225, -361.225] - loss: 321.692 - mae: 334.851 - mean_q: 420.197 Interval 2991 (1495000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.4201 4 episodes - episode_reward: -325.983 [-776.692, -149.216] - loss: 341.478 - mae: 324.363 - mean_q: 405.558 Interval 2992 (1495500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6813 Interval 2993 (1496000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0197 1 episodes - episode_reward: -636.440 [-636.440, -636.440] - loss: 340.517 - mae: 313.551 - mean_q: 390.264 Interval 2994 (1496500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2089 Interval 2995 (1497000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1188 2 episodes - episode_reward: -346.441 [-593.223, -99.660] - loss: 328.782 - mae: 297.495 - mean_q: 371.776 Interval 2996 (1497500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6587 1 episodes - episode_reward: -407.995 [-407.995, -407.995] - loss: 313.996 - mae: 289.573 - mean_q: 359.164 Interval 2997 (1498000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.2796 5 episodes - episode_reward: -451.065 [-968.095, -144.055] - loss: 282.486 - mae: 282.361 - mean_q: 349.523 Interval 2998 (1498500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9603 2 episodes - episode_reward: -409.662 [-605.945, -213.379] - loss: 302.082 - mae: 279.035 - mean_q: 345.609 Interval 2999 (1499000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0826 1 episodes - episode_reward: -848.030 [-848.030, -848.030] - loss: 283.846 - mae: 271.587 - mean_q: 336.517 Interval 3000 (1499500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5274 1 episodes - episode_reward: -663.219 [-663.219, -663.219] - loss: 324.258 - mae: 263.537 - mean_q: 326.182 Interval 3001 (1500000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.7560 3 episodes - episode_reward: -609.330 [-1066.038, -105.838] - loss: 280.108 - mae: 258.920 - mean_q: 319.468 Interval 3002 (1500500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4132 Interval 3003 (1501000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.8886 Interval 3004 (1501500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9164 4 episodes - episode_reward: -399.876 [-571.040, -117.871] - loss: 248.014 - mae: 241.925 - mean_q: 294.911 Interval 3005 (1502000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3668 1 episodes - episode_reward: -605.571 [-605.571, -605.571] - loss: 247.366 - mae: 233.482 - mean_q: 282.711 Interval 3006 (1502500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6424 1 episodes - episode_reward: -662.806 [-662.806, -662.806] - loss: 240.546 - mae: 226.399 - mean_q: 271.245 Interval 3007 (1503000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4310 2 episodes - episode_reward: -415.861 [-449.534, -382.187] - loss: 216.440 - mae: 220.428 - mean_q: 264.133 Interval 3008 (1503500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8922 3 episodes - episode_reward: -201.785 [-314.563, -82.455] - loss: 226.937 - mae: 214.274 - mean_q: 256.115 Interval 3009 (1504000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8869 2 episodes - episode_reward: -425.612 [-437.360, -413.863] - loss: 266.356 - mae: 210.657 - mean_q: 250.624 Interval 3010 (1504500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0886 2 episodes - episode_reward: -385.984 [-606.192, -165.776] - loss: 233.060 - mae: 204.379 - mean_q: 240.719 Interval 3011 (1505000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8612 2 episodes - episode_reward: -367.140 [-427.222, -307.058] - loss: 216.431 - mae: 200.551 - mean_q: 235.983 Interval 3012 (1505500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8947 2 episodes - episode_reward: -351.204 [-422.762, -279.645] - loss: 184.667 - mae: 197.981 - mean_q: 233.846 Interval 3013 (1506000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5287 2 episodes - episode_reward: -388.291 [-399.342, -377.240] - loss: 177.007 - mae: 194.650 - mean_q: 229.997 Interval 3014 (1506500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1968 3 episodes - episode_reward: -293.227 [-414.434, -100.000] - loss: 176.433 - mae: 190.749 - mean_q: 224.402 Interval 3015 (1507000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9609 2 episodes - episode_reward: -121.826 [-146.782, -96.870] - loss: 181.372 - mae: 187.851 - mean_q: 220.816 Interval 3016 (1507500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4577 3 episodes - episode_reward: -427.071 [-491.531, -386.398] - loss: 194.465 - mae: 184.015 - mean_q: 216.284 Interval 3017 (1508000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3113 4 episodes - episode_reward: -212.834 [-310.539, -126.813] - loss: 191.037 - mae: 182.162 - mean_q: 214.025 Interval 3018 (1508500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9901 4 episodes - episode_reward: -234.757 [-343.770, -158.593] - loss: 172.032 - mae: 181.532 - mean_q: 213.505 Interval 3019 (1509000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.8959 6 episodes - episode_reward: -240.905 [-383.024, -107.760] - loss: 152.355 - mae: 179.806 - mean_q: 208.873 Interval 3020 (1509500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9964 4 episodes - episode_reward: -265.032 [-318.888, -178.568] - loss: 192.588 - mae: 179.783 - mean_q: 208.526 Interval 3021 (1510000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8810 5 episodes - episode_reward: -263.263 [-425.692, -62.667] - loss: 217.882 - mae: 181.579 - mean_q: 210.822 Interval 3022 (1510500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.8087 6 episodes - episode_reward: -213.076 [-397.640, -114.114] - loss: 201.977 - mae: 182.851 - mean_q: 213.379 Interval 3023 (1511000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5895 4 episodes - episode_reward: -373.364 [-613.914, -193.649] - loss: 233.132 - mae: 184.283 - mean_q: 216.051 Interval 3024 (1511500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2391 2 episodes - episode_reward: -318.377 [-422.590, -214.164] - loss: 237.712 - mae: 187.222 - mean_q: 218.613 Interval 3025 (1512000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8336 6 episodes - episode_reward: -233.622 [-407.907, -107.532] - loss: 233.432 - mae: 188.401 - mean_q: 218.264 Interval 3026 (1512500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0132 2 episodes - episode_reward: -282.121 [-359.484, -204.757] - loss: 271.719 - mae: 194.118 - mean_q: 225.407 Interval 3027 (1513000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1322 2 episodes - episode_reward: 14.128 [-148.577, 176.833] - loss: 358.628 - mae: 197.546 - mean_q: 228.430 Interval 3028 (1513500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0274 3 episodes - episode_reward: -307.818 [-524.301, -60.315] - loss: 295.301 - mae: 200.883 - mean_q: 232.053 Interval 3029 (1514000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.1781 5 episodes - episode_reward: -436.013 [-949.035, -142.388] - loss: 362.966 - mae: 209.482 - mean_q: 246.192 Interval 3030 (1514500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9411 7 episodes - episode_reward: -143.835 [-254.003, -85.406] - loss: 374.167 - mae: 212.245 - mean_q: 252.112 Interval 3031 (1515000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0806 3 episodes - episode_reward: -173.635 [-201.634, -133.306] - loss: 445.931 - mae: 219.349 - mean_q: 262.977 Interval 3032 (1515500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7160 Interval 3033 (1516000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.4468 3 episodes - episode_reward: -855.385 [-1256.409, -463.702] - loss: 414.417 - mae: 238.664 - mean_q: 286.705 Interval 3034 (1516500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2638 4 episodes - episode_reward: -277.212 [-517.368, -139.336] - loss: 516.040 - mae: 243.788 - mean_q: 293.484 Interval 3035 (1517000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1036 1 episodes - episode_reward: -242.443 [-242.443, -242.443] - loss: 488.928 - mae: 253.470 - mean_q: 307.905 Interval 3036 (1517500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4059 2 episodes - episode_reward: -811.673 [-1030.217, -593.128] - loss: 497.396 - mae: 252.850 - mean_q: 305.847 Interval 3037 (1518000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9741 1 episodes - episode_reward: 1.954 [1.954, 1.954] - loss: 508.324 - mae: 263.275 - mean_q: 321.101 Interval 3038 (1518500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7961 1 episodes - episode_reward: -614.559 [-614.559, -614.559] - loss: 489.847 - mae: 271.279 - mean_q: 330.755 Interval 3039 (1519000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1223 1 episodes - episode_reward: -570.589 [-570.589, -570.589] - loss: 501.658 - mae: 270.867 - mean_q: 330.179 Interval 3040 (1519500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7238 2 episodes - episode_reward: -449.185 [-721.397, -176.974] - loss: 500.548 - mae: 277.982 - mean_q: 342.073 Interval 3041 (1520000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2653 1 episodes - episode_reward: -701.350 [-701.350, -701.350] - loss: 498.609 - mae: 283.559 - mean_q: 348.272 Interval 3042 (1520500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3841 Interval 3043 (1521000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9369 1 episodes - episode_reward: -607.978 [-607.978, -607.978] - loss: 615.240 - mae: 290.132 - mean_q: 360.475 Interval 3044 (1521500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5352 1 episodes - episode_reward: -896.632 [-896.632, -896.632] - loss: 659.850 - mae: 307.411 - mean_q: 384.638 Interval 3045 (1522000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0373 7 episodes - episode_reward: -151.830 [-297.174, -93.649] - loss: 636.479 - mae: 311.727 - mean_q: 391.411 Interval 3046 (1522500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1024 4 episodes - episode_reward: -141.657 [-259.414, -87.400] - loss: 809.346 - mae: 316.131 - mean_q: 399.160 Interval 3047 (1523000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9810 6 episodes - episode_reward: -137.938 [-248.183, -100.000] - loss: 784.524 - mae: 326.913 - mean_q: 414.301 Interval 3048 (1523500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3412 3 episodes - episode_reward: -238.654 [-304.243, -186.899] - loss: 749.079 - mae: 339.829 - mean_q: 431.829 Interval 3049 (1524000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.7525 3 episodes - episode_reward: -436.248 [-543.368, -299.894] - loss: 750.696 - mae: 353.002 - mean_q: 449.975 Interval 3050 (1524500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9440 4 episodes - episode_reward: -280.488 [-637.767, -25.312] - loss: 966.243 - mae: 372.006 - mean_q: 472.817 Interval 3051 (1525000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0402 4 episodes - episode_reward: -238.577 [-658.987, -41.281] - loss: 995.548 - mae: 378.334 - mean_q: 482.203 Interval 3052 (1525500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4038 6 episodes - episode_reward: -123.930 [-234.709, -73.918] - loss: 969.751 - mae: 382.516 - mean_q: 486.851 Interval 3053 (1526000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9669 4 episodes - episode_reward: -257.177 [-487.265, -100.000] - loss: 947.200 - mae: 390.256 - mean_q: 497.327 Interval 3054 (1526500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0008 4 episodes - episode_reward: -136.041 [-192.533, -103.162] - loss: 1184.201 - mae: 383.972 - mean_q: 489.367 Interval 3055 (1527000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6151 3 episodes - episode_reward: -256.723 [-400.859, -71.180] - loss: 1030.499 - mae: 381.634 - mean_q: 488.816 Interval 3056 (1527500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6079 3 episodes - episode_reward: -376.340 [-734.734, -84.346] - loss: 1055.321 - mae: 395.240 - mean_q: 506.801 Interval 3057 (1528000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8931 2 episodes - episode_reward: -526.417 [-748.460, -304.375] - loss: 1337.400 - mae: 401.326 - mean_q: 516.498 Interval 3058 (1528500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.5608 1 episodes - episode_reward: -1162.836 [-1162.836, -1162.836] - loss: 1430.428 - mae: 404.754 - mean_q: 520.607 Interval 3059 (1529000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2982 3 episodes - episode_reward: -286.758 [-621.138, -106.723] - loss: 1121.371 - mae: 407.784 - mean_q: 524.854 Interval 3060 (1529500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5765 Interval 3061 (1530000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8312 2 episodes - episode_reward: -554.035 [-1010.574, -97.497] - loss: 1193.642 - mae: 418.830 - mean_q: 543.254 Interval 3062 (1530500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2035 4 episodes - episode_reward: -293.962 [-534.451, -140.669] - loss: 1739.040 - mae: 419.650 - mean_q: 542.816 Interval 3063 (1531000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8291 1 episodes - episode_reward: -167.347 [-167.347, -167.347] - loss: 1258.300 - mae: 424.056 - mean_q: 550.755 Interval 3064 (1531500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4749 2 episodes - episode_reward: -722.519 [-858.091, -586.947] - loss: 1273.996 - mae: 433.486 - mean_q: 563.843 Interval 3065 (1532000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1629 3 episodes - episode_reward: -368.730 [-494.391, -252.250] - loss: 1358.894 - mae: 434.755 - mean_q: 564.747 Interval 3066 (1532500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6462 1 episodes - episode_reward: -615.419 [-615.419, -615.419] - loss: 1328.042 - mae: 433.660 - mean_q: 564.311 Interval 3067 (1533000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4881 3 episodes - episode_reward: -394.401 [-687.132, -51.481] - loss: 1324.483 - mae: 441.098 - mean_q: 575.089 Interval 3068 (1533500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9483 2 episodes - episode_reward: -513.756 [-917.256, -110.256] - loss: 1313.339 - mae: 445.983 - mean_q: 582.248 Interval 3069 (1534000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.3270 5 episodes - episode_reward: -356.132 [-552.573, -141.853] - loss: 1930.279 - mae: 456.458 - mean_q: 595.421 Interval 3070 (1534500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9634 3 episodes - episode_reward: -376.207 [-541.481, -250.321] - loss: 1172.719 - mae: 448.598 - mean_q: 585.227 Interval 3071 (1535000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.5919 4 episodes - episode_reward: -528.574 [-856.172, -246.454] - loss: 1558.204 - mae: 459.050 - mean_q: 599.792 Interval 3072 (1535500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1534 3 episodes - episode_reward: -421.609 [-723.093, -124.996] - loss: 1138.541 - mae: 458.850 - mean_q: 599.364 Interval 3073 (1536000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.5479 2 episodes - episode_reward: -638.064 [-1167.249, -108.880] - loss: 1149.203 - mae: 477.384 - mean_q: 623.046 Interval 3074 (1536500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0124 2 episodes - episode_reward: -448.340 [-764.922, -131.757] - loss: 1359.194 - mae: 479.329 - mean_q: 625.488 Interval 3075 (1537000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7832 1 episodes - episode_reward: -657.052 [-657.052, -657.052] - loss: 1142.740 - mae: 461.303 - mean_q: 602.192 Interval 3076 (1537500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9663 3 episodes - episode_reward: -571.954 [-1357.290, -128.342] - loss: 2781.434 - mae: 450.936 - mean_q: 588.044 Interval 3077 (1538000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.2495 3 episodes - episode_reward: -443.775 [-716.737, -294.725] - loss: 2063.876 - mae: 463.947 - mean_q: 605.236 Interval 3078 (1538500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1801 3 episodes - episode_reward: -334.373 [-455.783, -129.255] - loss: 3823.524 - mae: 466.100 - mean_q: 607.595 Interval 3079 (1539000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8092 4 episodes - episode_reward: -304.676 [-768.363, -88.783] - loss: 1045.817 - mae: 472.785 - mean_q: 619.149 Interval 3080 (1539500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9595 2 episodes - episode_reward: -803.743 [-1200.702, -406.783] - loss: 1219.929 - mae: 479.110 - mean_q: 627.742 Interval 3081 (1540000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1452 4 episodes - episode_reward: -209.156 [-248.451, -162.518] - loss: 2931.965 - mae: 479.181 - mean_q: 625.717 Interval 3082 (1540500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.3081 4 episodes - episode_reward: -457.751 [-1300.209, -124.685] - loss: 1458.727 - mae: 505.129 - mean_q: 663.054 Interval 3083 (1541000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.0855 4 episodes - episode_reward: -322.769 [-669.897, -108.190] - loss: 2532.611 - mae: 511.123 - mean_q: 670.966 Interval 3084 (1541500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4366 3 episodes - episode_reward: -432.103 [-595.256, -124.075] - loss: 3018.188 - mae: 500.635 - mean_q: 657.209 Interval 3085 (1542000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.6658 4 episodes - episode_reward: -381.516 [-398.610, -355.334] - loss: 2058.872 - mae: 511.672 - mean_q: 673.518 Interval 3086 (1542500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7447 2 episodes - episode_reward: -456.870 [-523.216, -390.524] - loss: 1979.541 - mae: 522.792 - mean_q: 689.394 Interval 3087 (1543000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8022 3 episodes - episode_reward: -252.840 [-352.960, -122.039] - loss: 1974.771 - mae: 515.333 - mean_q: 677.871 Interval 3088 (1543500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.7669 4 episodes - episode_reward: -375.449 [-527.556, -150.794] - loss: 2764.296 - mae: 521.508 - mean_q: 685.217 Interval 3089 (1544000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7845 2 episodes - episode_reward: -459.711 [-763.726, -155.696] - loss: 2293.494 - mae: 507.706 - mean_q: 669.292 Interval 3090 (1544500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9880 1 episodes - episode_reward: -331.398 [-331.398, -331.398] - loss: 2969.727 - mae: 529.059 - mean_q: 697.159 Interval 3091 (1545000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2316 2 episodes - episode_reward: -622.349 [-846.715, -397.984] - loss: 2302.390 - mae: 530.411 - mean_q: 700.462 Interval 3092 (1545500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4388 2 episodes - episode_reward: -379.248 [-560.735, -197.760] - loss: 2773.607 - mae: 554.093 - mean_q: 737.053 Interval 3093 (1546000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9668 2 episodes - episode_reward: -371.474 [-642.948, -100.000] - loss: 2315.191 - mae: 563.818 - mean_q: 752.074 Interval 3094 (1546500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3046 3 episodes - episode_reward: -412.205 [-433.762, -388.916] - loss: 3036.838 - mae: 575.510 - mean_q: 767.622 Interval 3095 (1547000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2218 4 episodes - episode_reward: -304.740 [-436.007, -197.474] - loss: 3051.115 - mae: 595.868 - mean_q: 795.300 Interval 3096 (1547500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6157 3 episodes - episode_reward: -270.688 [-367.150, -122.700] - loss: 2890.540 - mae: 603.011 - mean_q: 807.376 Interval 3097 (1548000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8013 3 episodes - episode_reward: -289.357 [-444.264, -202.781] - loss: 2814.845 - mae: 615.629 - mean_q: 827.902 Interval 3098 (1548500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5225 1 episodes - episode_reward: -369.980 [-369.980, -369.980] - loss: 4337.540 - mae: 635.379 - mean_q: 855.642 Interval 3099 (1549000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7932 2 episodes - episode_reward: -598.276 [-619.731, -576.822] - loss: 2638.693 - mae: 664.659 - mean_q: 899.195 Interval 3100 (1549500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.2468 1 episodes - episode_reward: -383.583 [-383.583, -383.583] - loss: 2773.835 - mae: 688.041 - mean_q: 929.219 Interval 3101 (1550000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.3272 1 episodes - episode_reward: -1675.276 [-1675.276, -1675.276] - loss: 3541.819 - mae: 727.849 - mean_q: 983.683 Interval 3102 (1550500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4599 Interval 3103 (1551000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0103 1 episodes - episode_reward: -1824.766 [-1824.766, -1824.766] - loss: 3710.063 - mae: 821.268 - mean_q: 1109.798 Interval 3104 (1551500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8002 1 episodes - episode_reward: -779.561 [-779.561, -779.561] - loss: 3202.852 - mae: 850.972 - mean_q: 1148.905 Interval 3105 (1552000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1249 1 episodes - episode_reward: -738.056 [-738.056, -738.056] - loss: 3441.833 - mae: 886.591 - mean_q: 1198.644 Interval 3106 (1552500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.5427 Interval 3107 (1553000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.2498 1 episodes - episode_reward: -4041.720 [-4041.720, -4041.720] - loss: 2675.862 - mae: 974.527 - mean_q: 1324.815 Interval 3108 (1553500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1893 Interval 3109 (1554000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6473 1 episodes - episode_reward: -801.743 [-801.743, -801.743] - loss: 3538.282 - mae: 1055.457 - mean_q: 1431.655 Interval 3110 (1554500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.4413 Interval 3111 (1555000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9355 1 episodes - episode_reward: -2689.293 [-2689.293, -2689.293] - loss: 7372.118 - mae: 1159.344 - mean_q: 1567.822 Interval 3112 (1555500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6442 Interval 3113 (1556000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.4645 1 episodes - episode_reward: -2018.997 [-2018.997, -2018.997] - loss: 3393.218 - mae: 1219.317 - mean_q: 1653.305 Interval 3114 (1556500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6941 Interval 3115 (1557000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.6250 2 episodes - episode_reward: -1134.407 [-2107.479, -161.334] - loss: 4064.757 - mae: 1312.781 - mean_q: 1782.788 Interval 3116 (1557500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8166 2 episodes - episode_reward: -332.598 [-370.017, -295.179] - loss: 4565.706 - mae: 1323.785 - mean_q: 1799.447 Interval 3117 (1558000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.1967 Interval 3118 (1558500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.5184 1 episodes - episode_reward: -2219.584 [-2219.584, -2219.584] - loss: 4820.447 - mae: 1392.068 - mean_q: 1891.247 Interval 3119 (1559000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9586 Interval 3120 (1559500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.9784 2 episodes - episode_reward: -2158.070 [-4139.801, -176.338] - loss: 6537.378 - mae: 1446.543 - mean_q: 1962.032 Interval 3121 (1560000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3935 1 episodes - episode_reward: -392.656 [-392.656, -392.656] - loss: 6107.157 - mae: 1460.394 - mean_q: 1979.541 Interval 3122 (1560500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.8565 Interval 3123 (1561000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.6800 2 episodes - episode_reward: -2833.033 [-4870.134, -795.933] - loss: 10350.096 - mae: 1461.662 - mean_q: 1979.564 Interval 3124 (1561500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9119 Interval 3125 (1562000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.5170 1 episodes - episode_reward: -1438.765 [-1438.765, -1438.765] - loss: 5053.747 - mae: 1463.174 - mean_q: 1981.413 Interval 3126 (1562500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -7.1887 1 episodes - episode_reward: -4374.588 [-4374.588, -4374.588] - loss: 4084.069 - mae: 1466.900 - mean_q: 1986.246 Interval 3127 (1563000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9595 1 episodes - episode_reward: -879.405 [-879.405, -879.405] - loss: 4771.040 - mae: 1450.004 - mean_q: 1963.948 Interval 3128 (1563500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.0248 Interval 3129 (1564000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.8255 4 episodes - episode_reward: -1277.467 [-4247.604, -109.537] - loss: 6074.718 - mae: 1447.024 - mean_q: 1961.732 Interval 3130 (1564500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1150 Interval 3131 (1565000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -7.3273 1 episodes - episode_reward: -4074.285 [-4074.285, -4074.285] - loss: 6613.201 - mae: 1462.969 - mean_q: 1986.298 Interval 3132 (1565500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9215 1 episodes - episode_reward: -901.056 [-901.056, -901.056] - loss: 7039.661 - mae: 1450.193 - mean_q: 1969.614 Interval 3133 (1566000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1824 1 episodes - episode_reward: -1015.371 [-1015.371, -1015.371] - loss: 7112.904 - mae: 1471.737 - mean_q: 2000.465 Interval 3134 (1566500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.2068 1 episodes - episode_reward: -2270.495 [-2270.495, -2270.495] - loss: 4928.021 - mae: 1487.879 - mean_q: 2027.140 Interval 3135 (1567000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6349 1 episodes - episode_reward: -634.676 [-634.676, -634.676] - loss: 7570.638 - mae: 1512.471 - mean_q: 2057.586 Interval 3136 (1567500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7079 1 episodes - episode_reward: -1015.245 [-1015.245, -1015.245] - loss: 6265.121 - mae: 1521.932 - mean_q: 2071.090 Interval 3137 (1568000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.4810 Interval 3138 (1568500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1041 3 episodes - episode_reward: -972.884 [-1580.857, -100.000] - loss: 5914.965 - mae: 1542.861 - mean_q: 2099.014 Interval 3139 (1569000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.5448 1 episodes - episode_reward: -2175.620 [-2175.620, -2175.620] - loss: 10137.244 - mae: 1544.088 - mean_q: 2095.225 Interval 3140 (1569500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.7710 2 episodes - episode_reward: -740.728 [-1187.190, -294.266] - loss: 8044.446 - mae: 1552.903 - mean_q: 2106.456 Interval 3141 (1570000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1797 Interval 3142 (1570500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9503 2 episodes - episode_reward: -987.323 [-1595.774, -378.872] - loss: 6899.261 - mae: 1548.416 - mean_q: 2100.254 Interval 3143 (1571000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.9365 3 episodes - episode_reward: -650.305 [-1262.908, -262.414] - loss: 8108.519 - mae: 1546.730 - mean_q: 2094.094 Interval 3144 (1571500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5418 Interval 3145 (1572000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3199 2 episodes - episode_reward: -808.828 [-1207.043, -410.614] - loss: 6509.166 - mae: 1531.051 - mean_q: 2071.323 Interval 3146 (1572500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5532 1 episodes - episode_reward: -1025.566 [-1025.566, -1025.566] - loss: 7160.023 - mae: 1507.854 - mean_q: 2036.000 Interval 3147 (1573000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9336 Interval 3148 (1573500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8069 2 episodes - episode_reward: -1027.792 [-1084.327, -971.258] - loss: 5511.401 - mae: 1486.472 - mean_q: 2005.643 Interval 3149 (1574000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.7879 2 episodes - episode_reward: -726.462 [-748.821, -704.102] - loss: 5598.107 - mae: 1470.550 - mean_q: 1983.170 Interval 3150 (1574500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2993 1 episodes - episode_reward: -1912.795 [-1912.795, -1912.795] - loss: 7069.453 - mae: 1460.053 - mean_q: 1967.533 Interval 3151 (1575000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.7701 4 episodes - episode_reward: -355.719 [-491.709, -150.420] - loss: 7920.928 - mae: 1418.771 - mean_q: 1910.287 Interval 3152 (1575500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2941 Interval 3153 (1576000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0219 Interval 3154 (1576500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.4360 Interval 3155 (1577000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.1391 2 episodes - episode_reward: -734.913 [-937.406, -532.419] - loss: 6738.609 - mae: 1368.142 - mean_q: 1845.861 Interval 3156 (1577500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4883 2 episodes - episode_reward: -83.317 [-114.051, -52.583] - loss: 4914.742 - mae: 1344.960 - mean_q: 1815.790 Interval 3157 (1578000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.9871 1 episodes - episode_reward: -958.812 [-958.812, -958.812] - loss: 4576.389 - mae: 1345.935 - mean_q: 1821.333 Interval 3158 (1578500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.7386 2 episodes - episode_reward: -759.409 [-1279.118, -239.701] - loss: 4195.457 - mae: 1343.220 - mean_q: 1816.568 Interval 3159 (1579000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6807 Interval 3160 (1579500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1918 1 episodes - episode_reward: -469.688 [-469.688, -469.688] - loss: 3617.837 - mae: 1334.111 - mean_q: 1802.003 Interval 3161 (1580000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4263 1 episodes - episode_reward: -997.322 [-997.322, -997.322] - loss: 4690.396 - mae: 1325.474 - mean_q: 1789.993 Interval 3162 (1580500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6903 1 episodes - episode_reward: -888.233 [-888.233, -888.233] - loss: 4315.834 - mae: 1314.576 - mean_q: 1774.798 Interval 3163 (1581000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6845 Interval 3164 (1581500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.1965 3 episodes - episode_reward: -984.996 [-1868.001, -151.586] - loss: 4406.300 - mae: 1301.141 - mean_q: 1756.746 Interval 3165 (1582000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1459 2 episodes - episode_reward: -800.995 [-958.031, -643.959] - loss: 3698.412 - mae: 1282.144 - mean_q: 1728.356 Interval 3166 (1582500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3770 1 episodes - episode_reward: -692.245 [-692.245, -692.245] - loss: 3488.629 - mae: 1247.616 - mean_q: 1681.029 Interval 3167 (1583000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7850 1 episodes - episode_reward: -1276.159 [-1276.159, -1276.159] - loss: 3930.098 - mae: 1237.969 - mean_q: 1666.344 Interval 3168 (1583500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7706 1 episodes - episode_reward: -730.586 [-730.586, -730.586] - loss: 3918.111 - mae: 1221.472 - mean_q: 1642.765 Interval 3169 (1584000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7695 1 episodes - episode_reward: -1259.744 [-1259.744, -1259.744] - loss: 3614.472 - mae: 1223.635 - mean_q: 1645.428 Interval 3170 (1584500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1173 3 episodes - episode_reward: -524.196 [-879.927, -255.885] - loss: 3089.208 - mae: 1215.871 - mean_q: 1634.349 Interval 3171 (1585000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7065 1 episodes - episode_reward: -612.519 [-612.519, -612.519] - loss: 4009.938 - mae: 1193.042 - mean_q: 1603.504 Interval 3172 (1585500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8094 3 episodes - episode_reward: -405.849 [-486.892, -329.927] - loss: 3397.498 - mae: 1201.970 - mean_q: 1615.754 Interval 3173 (1586000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9147 1 episodes - episode_reward: -182.927 [-182.927, -182.927] - loss: 2798.081 - mae: 1205.488 - mean_q: 1620.387 Interval 3174 (1586500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7338 5 episodes - episode_reward: -216.527 [-452.107, -41.305] - loss: 3258.515 - mae: 1202.873 - mean_q: 1615.083 Interval 3175 (1587000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3502 1 episodes - episode_reward: -250.285 [-250.285, -250.285] - loss: 3362.314 - mae: 1193.448 - mean_q: 1602.628 Interval 3176 (1587500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6988 2 episodes - episode_reward: -112.153 [-239.582, 15.276] - loss: 4500.645 - mae: 1203.333 - mean_q: 1617.207 Interval 3177 (1588000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4447 Interval 3178 (1588500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5482 1 episodes - episode_reward: -662.031 [-662.031, -662.031] - loss: 3504.612 - mae: 1224.493 - mean_q: 1646.012 Interval 3179 (1589000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1256 1 episodes - episode_reward: -459.984 [-459.984, -459.984] - loss: 4733.732 - mae: 1235.564 - mean_q: 1659.128 Interval 3180 (1589500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1595 Interval 3181 (1590000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2900 Interval 3182 (1590500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.9977 1 episodes - episode_reward: -659.631 [-659.631, -659.631] - loss: 4881.569 - mae: 1258.755 - mean_q: 1688.905 Interval 3183 (1591000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9100 1 episodes - episode_reward: -433.049 [-433.049, -433.049] - loss: 5307.466 - mae: 1222.508 - mean_q: 1639.185 Interval 3184 (1591500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9521 2 episodes - episode_reward: -269.918 [-327.865, -211.971] - loss: 3896.011 - mae: 1212.029 - mean_q: 1624.765 Interval 3185 (1592000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9703 Interval 3186 (1592500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0033 1 episodes - episode_reward: -698.021 [-698.021, -698.021] - loss: 3451.591 - mae: 1190.539 - mean_q: 1594.858 Interval 3187 (1593000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9246 4 episodes - episode_reward: -403.394 [-669.298, -153.680] - loss: 2859.894 - mae: 1176.405 - mean_q: 1572.604 Interval 3188 (1593500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4532 2 episodes - episode_reward: -867.897 [-1417.458, -318.337] - loss: 2777.213 - mae: 1149.509 - mean_q: 1536.906 Interval 3189 (1594000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.4647 3 episodes - episode_reward: -577.021 [-855.891, -342.156] - loss: 3178.660 - mae: 1127.138 - mean_q: 1504.857 Interval 3190 (1594500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3390 3 episodes - episode_reward: -443.078 [-676.217, -268.923] - loss: 3655.001 - mae: 1108.559 - mean_q: 1480.301 Interval 3191 (1595000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8738 1 episodes - episode_reward: -413.600 [-413.600, -413.600] - loss: 3405.403 - mae: 1079.815 - mean_q: 1442.178 Interval 3192 (1595500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4567 2 episodes - episode_reward: -306.063 [-467.906, -144.219] - loss: 4316.124 - mae: 1055.507 - mean_q: 1410.669 Interval 3193 (1596000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2041 1 episodes - episode_reward: -696.531 [-696.531, -696.531] - loss: 2366.230 - mae: 1031.007 - mean_q: 1379.031 Interval 3194 (1596500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.7287 2 episodes - episode_reward: -527.522 [-669.526, -385.519] - loss: 3132.525 - mae: 1018.193 - mean_q: 1361.310 Interval 3195 (1597000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1732 3 episodes - episode_reward: -611.041 [-1117.003, -338.312] - loss: 2194.934 - mae: 997.922 - mean_q: 1334.149 Interval 3196 (1597500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3004 1 episodes - episode_reward: -231.187 [-231.187, -231.187] - loss: 3396.976 - mae: 979.811 - mean_q: 1309.590 Interval 3197 (1598000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.8500 1 episodes - episode_reward: -484.333 [-484.333, -484.333] - loss: 2085.126 - mae: 966.841 - mean_q: 1294.654 Interval 3198 (1598500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -5.1329 6 episodes - episode_reward: -421.177 [-840.134, -183.456] - loss: 2111.291 - mae: 947.900 - mean_q: 1269.639 Interval 3199 (1599000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.3426 5 episodes - episode_reward: -317.863 [-447.561, -262.018] - loss: 2325.470 - mae: 917.835 - mean_q: 1228.919 Interval 3200 (1599500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0580 1 episodes - episode_reward: -546.930 [-546.930, -546.930] - loss: 2007.846 - mae: 917.754 - mean_q: 1231.032 Interval 3201 (1600000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2132 Interval 3202 (1600500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.6615 5 episodes - episode_reward: -311.310 [-502.345, -128.633] - loss: 2734.107 - mae: 902.292 - mean_q: 1209.198 Interval 3203 (1601000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9749 2 episodes - episode_reward: -447.119 [-501.704, -392.534] - loss: 1895.340 - mae: 887.860 - mean_q: 1190.483 Interval 3204 (1601500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4551 Interval 3205 (1602000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.0944 1 episodes - episode_reward: -1039.362 [-1039.362, -1039.362] - loss: 1755.325 - mae: 864.500 - mean_q: 1157.959 Interval 3206 (1602500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1584 5 episodes - episode_reward: -265.720 [-460.577, -137.373] - loss: 2211.010 - mae: 854.151 - mean_q: 1144.071 Interval 3207 (1603000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.6621 4 episodes - episode_reward: -447.302 [-867.290, -169.842] - loss: 1581.983 - mae: 823.349 - mean_q: 1102.806 Interval 3208 (1603500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9873 3 episodes - episode_reward: -310.139 [-389.524, -186.338] - loss: 1803.837 - mae: 808.948 - mean_q: 1082.622 Interval 3209 (1604000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4333 4 episodes - episode_reward: -319.337 [-582.916, -182.701] - loss: 1882.840 - mae: 808.680 - mean_q: 1080.526 Interval 3210 (1604500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9282 1 episodes - episode_reward: -480.438 [-480.438, -480.438] - loss: 1573.725 - mae: 787.001 - mean_q: 1052.117 Interval 3211 (1605000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0493 2 episodes - episode_reward: -439.159 [-563.916, -314.402] - loss: 1811.638 - mae: 772.033 - mean_q: 1030.788 Interval 3212 (1605500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1227 3 episodes - episode_reward: -578.636 [-715.876, -474.813] - loss: 1937.074 - mae: 755.287 - mean_q: 1006.607 Interval 3213 (1606000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.3275 4 episodes - episode_reward: -338.399 [-495.475, -185.019] - loss: 1742.172 - mae: 736.982 - mean_q: 981.356 Interval 3214 (1606500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.2467 3 episodes - episode_reward: -377.885 [-605.548, -240.581] - loss: 1429.934 - mae: 722.279 - mean_q: 960.964 Interval 3215 (1607000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.6984 4 episodes - episode_reward: -529.447 [-1037.312, -210.248] - loss: 1651.563 - mae: 700.204 - mean_q: 929.580 Interval 3216 (1607500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8751 3 episodes - episode_reward: -296.073 [-355.550, -224.320] - loss: 1469.178 - mae: 690.411 - mean_q: 917.215 Interval 3217 (1608000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6456 2 episodes - episode_reward: -459.115 [-645.190, -273.040] - loss: 1527.281 - mae: 678.219 - mean_q: 899.961 Interval 3218 (1608500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.5600 4 episodes - episode_reward: -440.325 [-685.190, -253.949] - loss: 1883.668 - mae: 658.656 - mean_q: 871.851 Interval 3219 (1609000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5635 2 episodes - episode_reward: -385.588 [-460.987, -310.188] - loss: 1644.714 - mae: 648.226 - mean_q: 856.699 Interval 3220 (1609500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.0412 4 episodes - episode_reward: -513.301 [-817.487, -182.030] - loss: 1867.504 - mae: 623.253 - mean_q: 821.282 Interval 3221 (1610000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.7979 1 episodes - episode_reward: -298.874 [-298.874, -298.874] - loss: 1856.911 - mae: 614.273 - mean_q: 810.540 Interval 3222 (1610500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -7.0315 6 episodes - episode_reward: -826.757 [-3486.354, -100.000] - loss: 2044.644 - mae: 599.185 - mean_q: 790.622 Interval 3223 (1611000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.8802 5 episodes - episode_reward: -395.198 [-470.844, -331.389] - loss: 1694.909 - mae: 585.822 - mean_q: 771.419 Interval 3224 (1611500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1887 4 episodes - episode_reward: -318.997 [-457.350, -198.830] - loss: 1573.851 - mae: 577.088 - mean_q: 758.019 Interval 3225 (1612000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9197 3 episodes - episode_reward: -457.821 [-610.392, -371.821] - loss: 2363.556 - mae: 572.254 - mean_q: 755.553 Interval 3226 (1612500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.5985 4 episodes - episode_reward: -578.691 [-1054.065, -225.481] - loss: 1873.030 - mae: 549.673 - mean_q: 725.205 Interval 3227 (1613000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.1947 3 episodes - episode_reward: -522.682 [-658.573, -263.213] - loss: 2365.300 - mae: 548.865 - mean_q: 722.038 Interval 3228 (1613500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0905 3 episodes - episode_reward: -265.780 [-334.369, -180.720] - loss: 1847.591 - mae: 531.682 - mean_q: 699.115 Interval 3229 (1614000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.8653 8 episodes - episode_reward: -334.166 [-882.615, -131.946] - loss: 1938.851 - mae: 514.659 - mean_q: 675.863 Interval 3230 (1614500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.4708 5 episodes - episode_reward: -384.833 [-615.902, -165.344] - loss: 1904.426 - mae: 504.068 - mean_q: 658.211 Interval 3231 (1615000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.4786 7 episodes - episode_reward: -292.508 [-577.844, -139.347] - loss: 1573.989 - mae: 484.715 - mean_q: 629.493 Interval 3232 (1615500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -4.6376 4 episodes - episode_reward: -579.940 [-698.530, -254.922] - loss: 1703.923 - mae: 475.742 - mean_q: 614.012 Interval 3233 (1616000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.9877 5 episodes - episode_reward: -391.476 [-503.010, -260.096] - loss: 1728.072 - mae: 473.208 - mean_q: 605.663 Interval 3234 (1616500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.1994 3 episodes - episode_reward: -406.924 [-742.244, -163.621] - loss: 1576.466 - mae: 466.062 - mean_q: 591.966 Interval 3235 (1617000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3642 3 episodes - episode_reward: -513.894 [-718.681, -171.538] - loss: 1689.519 - mae: 470.218 - mean_q: 595.043 Interval 3236 (1617500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.2950 2 episodes - episode_reward: -881.190 [-957.800, -804.580] - loss: 1950.188 - mae: 474.899 - mean_q: 597.013 Interval 3237 (1618000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.3647 3 episodes - episode_reward: -526.382 [-732.560, -268.821] - loss: 1738.969 - mae: 471.817 - mean_q: 591.740 Interval 3238 (1618500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4248 4 episodes - episode_reward: -336.192 [-649.734, -217.008] - loss: 1890.560 - mae: 483.859 - mean_q: 607.361 Interval 3239 (1619000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0112 2 episodes - episode_reward: -403.555 [-572.484, -234.626] - loss: 2263.917 - mae: 481.572 - mean_q: 600.962 Interval 3240 (1619500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.6852 2 episodes - episode_reward: -603.046 [-688.500, -517.591] - loss: 2048.504 - mae: 496.713 - mean_q: 623.420 Interval 3241 (1620000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -5.1983 4 episodes - episode_reward: -604.927 [-772.674, -481.056] - loss: 2280.584 - mae: 505.136 - mean_q: 631.308 Interval 3242 (1620500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.6040 2 episodes - episode_reward: -645.649 [-684.837, -606.462] - loss: 2437.973 - mae: 531.814 - mean_q: 667.781 Interval 3243 (1621000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.2823 4 episodes - episode_reward: -528.254 [-855.611, -212.727] - loss: 2459.250 - mae: 549.814 - mean_q: 691.623 Interval 3244 (1621500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.6223 2 episodes - episode_reward: -1100.146 [-1754.954, -445.339] - loss: 2509.879 - mae: 562.999 - mean_q: 707.210 Interval 3245 (1622000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -5.0439 3 episodes - episode_reward: -863.616 [-1383.226, -323.671] - loss: 3479.010 - mae: 595.854 - mean_q: 753.413 Interval 3246 (1622500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.6371 4 episodes - episode_reward: -590.740 [-973.380, -251.707] - loss: 4006.322 - mae: 628.269 - mean_q: 797.312 Interval 3247 (1623000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9372 3 episodes - episode_reward: -297.296 [-532.576, -153.438] - loss: 4333.551 - mae: 639.296 - mean_q: 805.913 Interval 3248 (1623500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1057 3 episodes - episode_reward: -573.560 [-1031.878, -261.167] - loss: 5197.624 - mae: 671.965 - mean_q: 849.910 Interval 3249 (1624000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.0714 3 episodes - episode_reward: -524.488 [-661.019, -428.678] - loss: 6462.389 - mae: 710.497 - mean_q: 899.665 Interval 3250 (1624500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3180 4 episodes - episode_reward: -380.702 [-471.074, -265.361] - loss: 6598.817 - mae: 747.237 - mean_q: 950.926 Interval 3251 (1625000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9259 3 episodes - episode_reward: -383.024 [-795.314, -134.712] - loss: 8751.632 - mae: 772.623 - mean_q: 983.310 Interval 3252 (1625500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2485 3 episodes - episode_reward: -474.560 [-702.897, -297.930] - loss: 9394.885 - mae: 803.394 - mean_q: 1025.641 Interval 3253 (1626000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3297 2 episodes - episode_reward: -567.108 [-592.489, -541.727] - loss: 9097.695 - mae: 823.627 - mean_q: 1052.960 Interval 3254 (1626500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8724 1 episodes - episode_reward: -452.404 [-452.404, -452.404] - loss: 10815.520 - mae: 854.062 - mean_q: 1094.235 Interval 3255 (1627000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9538 3 episodes - episode_reward: -334.344 [-499.901, -135.449] - loss: 11062.992 - mae: 884.034 - mean_q: 1132.406 Interval 3256 (1627500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3866 2 episodes - episode_reward: -350.442 [-547.452, -153.433] - loss: 11010.043 - mae: 910.787 - mean_q: 1166.214 Interval 3257 (1628000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1423 3 episodes - episode_reward: -533.696 [-566.321, -487.414] - loss: 11402.067 - mae: 939.248 - mean_q: 1207.108 Interval 3258 (1628500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4660 3 episodes - episode_reward: -391.291 [-726.330, -113.126] - loss: 12406.174 - mae: 1004.001 - mean_q: 1296.193 Interval 3259 (1629000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3136 2 episodes - episode_reward: -194.853 [-206.814, -182.892] - loss: 14322.725 - mae: 1030.470 - mean_q: 1330.791 Interval 3260 (1629500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8069 1 episodes - episode_reward: -917.998 [-917.998, -917.998] - loss: 14067.908 - mae: 1038.997 - mean_q: 1337.381 Interval 3261 (1630000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9274 4 episodes - episode_reward: -432.458 [-884.146, -181.095] - loss: 15143.659 - mae: 1061.108 - mean_q: 1370.048 Interval 3262 (1630500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0635 1 episodes - episode_reward: -227.787 [-227.787, -227.787] - loss: 13844.721 - mae: 1081.894 - mean_q: 1397.037 Interval 3263 (1631000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1345 3 episodes - episode_reward: -784.567 [-1084.033, -220.773] - loss: 15759.217 - mae: 1129.284 - mean_q: 1467.378 Interval 3264 (1631500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7791 4 episodes - episode_reward: -331.263 [-563.653, -171.312] - loss: 12486.046 - mae: 1176.297 - mean_q: 1534.583 Interval 3265 (1632000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5114 4 episodes - episode_reward: -291.105 [-365.500, -217.183] - loss: 13459.289 - mae: 1224.942 - mean_q: 1604.443 Interval 3266 (1632500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9257 3 episodes - episode_reward: -397.698 [-781.059, -192.291] - loss: 14828.515 - mae: 1221.027 - mean_q: 1601.747 Interval 3267 (1633000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.6993 4 episodes - episode_reward: -577.692 [-932.775, -120.808] - loss: 17793.111 - mae: 1289.837 - mean_q: 1698.482 Interval 3268 (1633500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6843 2 episodes - episode_reward: -317.890 [-534.574, -101.206] - loss: 15264.269 - mae: 1332.080 - mean_q: 1754.292 Interval 3269 (1634000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1666 3 episodes - episode_reward: -308.446 [-577.231, -160.448] - loss: 18972.020 - mae: 1365.476 - mean_q: 1801.799 Interval 3270 (1634500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.1479 4 episodes - episode_reward: -784.398 [-1273.789, -332.068] - loss: 19767.426 - mae: 1375.994 - mean_q: 1811.369 Interval 3271 (1635000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5401 4 episodes - episode_reward: -373.342 [-620.413, -166.907] - loss: 18989.154 - mae: 1430.948 - mean_q: 1888.720 Interval 3272 (1635500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5810 4 episodes - episode_reward: -352.783 [-571.793, -168.523] - loss: 18153.357 - mae: 1432.969 - mean_q: 1895.649 Interval 3273 (1636000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6468 1 episodes - episode_reward: -243.181 [-243.181, -243.181] - loss: 33840.027 - mae: 1508.321 - mean_q: 2007.445 Interval 3274 (1636500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.6105 2 episodes - episode_reward: -1161.930 [-1403.448, -920.411] - loss: 28719.350 - mae: 1496.887 - mean_q: 1997.700 Interval 3275 (1637000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4965 4 episodes - episode_reward: -194.910 [-437.783, 112.356] - loss: 49457.543 - mae: 1602.246 - mean_q: 2154.350 Interval 3276 (1637500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0865 1 episodes - episode_reward: -37.469 [-37.469, -37.469] - loss: 24130.980 - mae: 1708.599 - mean_q: 2311.220 Interval 3277 (1638000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0859 Interval 3278 (1638500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0599 7 episodes - episode_reward: -226.095 [-372.415, -128.384] - loss: 44573.801 - mae: 1937.169 - mean_q: 2639.650 Interval 3279 (1639000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0903 Interval 3280 (1639500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4588 1 episodes - episode_reward: -137.759 [-137.759, -137.759] - loss: 30819.207 - mae: 2189.255 - mean_q: 2982.254 Interval 3281 (1640000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0949 Interval 3282 (1640500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3218 1 episodes - episode_reward: -591.736 [-591.736, -591.736] - loss: 32365.615 - mae: 2450.572 - mean_q: 3317.277 Interval 3283 (1641000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3602 2 episodes - episode_reward: -215.499 [-382.680, -48.318] - loss: 25850.287 - mae: 2525.010 - mean_q: 3409.897 Interval 3284 (1641500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0854 1 episodes - episode_reward: -237.946 [-237.946, -237.946] - loss: 40753.133 - mae: 2580.942 - mean_q: 3475.892 Interval 3285 (1642000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5004 2 episodes - episode_reward: -273.233 [-416.679, -129.787] - loss: 36349.008 - mae: 2597.850 - mean_q: 3495.979 Interval 3286 (1642500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2216 3 episodes - episode_reward: -198.377 [-277.937, -100.000] - loss: 36158.781 - mae: 2585.461 - mean_q: 3473.561 Interval 3287 (1643000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5877 Interval 3288 (1643500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6539 2 episodes - episode_reward: -506.225 [-623.762, -388.688] - loss: 36814.961 - mae: 2631.736 - mean_q: 3536.341 Interval 3289 (1644000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8857 1 episodes - episode_reward: -189.407 [-189.407, -189.407] - loss: 48252.770 - mae: 2656.251 - mean_q: 3567.719 Interval 3290 (1644500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2537 2 episodes - episode_reward: -300.035 [-454.000, -146.069] - loss: 30210.684 - mae: 2667.121 - mean_q: 3584.947 Interval 3291 (1645000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1288 3 episodes - episode_reward: -467.640 [-1002.961, -100.000] - loss: 31379.654 - mae: 2646.650 - mean_q: 3552.498 Interval 3292 (1645500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6936 6 episodes - episode_reward: -235.281 [-642.909, -111.939] - loss: 45158.676 - mae: 2617.024 - mean_q: 3511.230 Interval 3293 (1646000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8750 Interval 3294 (1646500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7771 3 episodes - episode_reward: -424.640 [-767.925, -110.938] - loss: 35157.719 - mae: 2696.229 - mean_q: 3626.258 Interval 3295 (1647000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4177 1 episodes - episode_reward: -175.618 [-175.618, -175.618] - loss: 29019.266 - mae: 2719.647 - mean_q: 3659.081 Interval 3296 (1647500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3318 2 episodes - episode_reward: -843.722 [-1317.512, -369.931] - loss: 39820.492 - mae: 2716.001 - mean_q: 3656.187 Interval 3297 (1648000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3577 4 episodes - episode_reward: -303.542 [-680.038, -76.517] - loss: 24935.174 - mae: 2722.455 - mean_q: 3667.452 Interval 3298 (1648500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0910 1 episodes - episode_reward: -132.801 [-132.801, -132.801] - loss: 30813.566 - mae: 2751.912 - mean_q: 3710.162 Interval 3299 (1649000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8570 2 episodes - episode_reward: -370.392 [-628.322, -112.462] - loss: 42591.309 - mae: 2788.955 - mean_q: 3763.564 Interval 3300 (1649500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8438 1 episodes - episode_reward: -674.919 [-674.919, -674.919] - loss: 30172.381 - mae: 2844.338 - mean_q: 3843.276 Interval 3301 (1650000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6021 1 episodes - episode_reward: -709.829 [-709.829, -709.829] - loss: 27972.293 - mae: 2888.746 - mean_q: 3905.871 Interval 3302 (1650500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7945 2 episodes - episode_reward: -682.188 [-827.686, -536.690] - loss: 36560.102 - mae: 2932.265 - mean_q: 3964.075 Interval 3303 (1651000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2194 Interval 3304 (1651500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8559 1 episodes - episode_reward: -492.459 [-492.459, -492.459] - loss: 33074.172 - mae: 3086.288 - mean_q: 4174.128 Interval 3305 (1652000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8575 1 episodes - episode_reward: -998.827 [-998.827, -998.827] - loss: 35442.188 - mae: 3090.061 - mean_q: 4169.113 Interval 3306 (1652500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1959 Interval 3307 (1653000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1020 Interval 3308 (1653500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0980 Interval 3309 (1654000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1259 Interval 3310 (1654500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3240 Interval 3311 (1655000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1777 Interval 3312 (1655500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.3848 Interval 3313 (1656000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.6915 Interval 3314 (1656500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6593 1 episodes - episode_reward: -1240.597 [-1240.597, -1240.597] - loss: 29019.873 - mae: 3384.181 - mean_q: 4578.694 Interval 3315 (1657000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8399 Interval 3316 (1657500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.7894 Interval 3317 (1658000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9943 2 episodes - episode_reward: -560.730 [-1019.717, -101.743] - loss: 43280.617 - mae: 3364.908 - mean_q: 4548.954 Interval 3318 (1658500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6213 Interval 3319 (1659000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3265 2 episodes - episode_reward: -495.176 [-685.095, -305.258] - loss: 35607.285 - mae: 3301.194 - mean_q: 4451.672 Interval 3320 (1659500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6933 Interval 3321 (1660000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2736 1 episodes - episode_reward: -1074.904 [-1074.904, -1074.904] - loss: 24670.453 - mae: 3205.841 - mean_q: 4315.625 Interval 3322 (1660500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.7520 1 episodes - episode_reward: -1054.867 [-1054.867, -1054.867] - loss: 22684.076 - mae: 3147.553 - mean_q: 4236.364 Interval 3323 (1661000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7858 Interval 3324 (1661500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7439 1 episodes - episode_reward: -752.630 [-752.630, -752.630] - loss: 24418.783 - mae: 3058.085 - mean_q: 4114.964 Interval 3325 (1662000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1961 Interval 3326 (1662500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.8004 Interval 3327 (1663000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -1.6626 1 episodes - episode_reward: -1255.041 [-1255.041, -1255.041] - loss: 25186.568 - mae: 2950.648 - mean_q: 3964.806 Interval 3328 (1663500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8038 2 episodes - episode_reward: -206.536 [-214.512, -198.560] - loss: 23834.605 - mae: 2882.590 - mean_q: 3876.251 Interval 3329 (1664000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9873 1 episodes - episode_reward: -1485.283 [-1485.283, -1485.283] - loss: 19402.789 - mae: 2850.208 - mean_q: 3830.409 Interval 3330 (1664500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9124 1 episodes - episode_reward: -862.451 [-862.451, -862.451] - loss: 20156.881 - mae: 2829.544 - mean_q: 3800.037 Interval 3331 (1665000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1349 Interval 3332 (1665500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.8599 2 episodes - episode_reward: -1576.318 [-3003.769, -148.868] - loss: 19516.662 - mae: 2734.783 - mean_q: 3667.868 Interval 3333 (1666000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9596 2 episodes - episode_reward: -496.828 [-860.328, -133.328] - loss: 17112.062 - mae: 2701.535 - mean_q: 3619.563 Interval 3334 (1666500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.6706 2 episodes - episode_reward: -406.183 [-584.379, -227.988] - loss: 18792.498 - mae: 2636.654 - mean_q: 3531.767 Interval 3335 (1667000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5926 2 episodes - episode_reward: -600.405 [-991.107, -209.703] - loss: 18085.535 - mae: 2556.824 - mean_q: 3424.336 Interval 3336 (1667500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5229 1 episodes - episode_reward: -629.730 [-629.730, -629.730] - loss: 16179.178 - mae: 2541.953 - mean_q: 3406.837 Interval 3337 (1668000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2171 Interval 3338 (1668500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1768 5 episodes - episode_reward: -246.606 [-479.713, -100.000] - loss: 16772.986 - mae: 2458.355 - mean_q: 3296.111 Interval 3339 (1669000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9575 3 episodes - episode_reward: -356.182 [-695.321, -152.099] - loss: 14837.398 - mae: 2400.251 - mean_q: 3221.527 Interval 3340 (1669500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6179 1 episodes - episode_reward: -604.334 [-604.334, -604.334] - loss: 13999.043 - mae: 2381.843 - mean_q: 3197.640 Interval 3341 (1670000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4736 2 episodes - episode_reward: -232.946 [-423.400, -42.492] - loss: 14495.958 - mae: 2356.621 - mean_q: 3165.836 Interval 3342 (1670500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9720 1 episodes - episode_reward: -978.011 [-978.011, -978.011] - loss: 14771.897 - mae: 2358.909 - mean_q: 3169.106 Interval 3343 (1671000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1260 1 episodes - episode_reward: -662.325 [-662.325, -662.325] - loss: 12970.168 - mae: 2332.263 - mean_q: 3136.867 Interval 3344 (1671500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9883 1 episodes - episode_reward: -190.084 [-190.084, -190.084] - loss: 14086.755 - mae: 2366.542 - mean_q: 3179.037 Interval 3345 (1672000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3579 3 episodes - episode_reward: -509.396 [-714.387, -129.097] - loss: 13050.482 - mae: 2348.294 - mean_q: 3156.102 Interval 3346 (1672500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5976 1 episodes - episode_reward: -187.585 [-187.585, -187.585] - loss: 12522.107 - mae: 2367.821 - mean_q: 3180.640 Interval 3347 (1673000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4338 3 episodes - episode_reward: -274.519 [-350.515, -232.868] - loss: 14158.399 - mae: 2368.954 - mean_q: 3181.783 Interval 3348 (1673500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4545 4 episodes - episode_reward: -180.991 [-345.038, -59.689] - loss: 14641.252 - mae: 2413.617 - mean_q: 3249.226 Interval 3349 (1674000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3461 7 episodes - episode_reward: -167.192 [-369.425, -100.724] - loss: 14904.354 - mae: 2467.828 - mean_q: 3336.250 Interval 3350 (1674500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0110 3 episodes - episode_reward: -159.983 [-222.142, -114.059] - loss: 18008.721 - mae: 2605.046 - mean_q: 3543.081 Interval 3351 (1675000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7103 3 episodes - episode_reward: -118.325 [-172.459, -43.902] - loss: 21038.574 - mae: 2824.225 - mean_q: 3852.548 Interval 3352 (1675500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7052 2 episodes - episode_reward: -191.928 [-248.477, -135.380] - loss: 24092.176 - mae: 3038.500 - mean_q: 4135.768 Interval 3353 (1676000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2326 4 episodes - episode_reward: -145.679 [-296.125, 11.577] - loss: 27808.422 - mae: 3214.581 - mean_q: 4364.380 Interval 3354 (1676500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4287 1 episodes - episode_reward: -366.066 [-366.066, -366.066] - loss: 27285.289 - mae: 3305.666 - mean_q: 4471.774 Interval 3355 (1677000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5056 2 episodes - episode_reward: -580.744 [-1050.798, -110.689] - loss: 23674.535 - mae: 3326.264 - mean_q: 4486.724 Interval 3356 (1677500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1287 3 episodes - episode_reward: -166.457 [-289.926, -69.822] - loss: 24774.965 - mae: 3361.104 - mean_q: 4530.079 Interval 3357 (1678000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5728 4 episodes - episode_reward: -203.987 [-346.333, -66.075] - loss: 25813.777 - mae: 3308.820 - mean_q: 4456.673 Interval 3358 (1678500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7094 2 episodes - episode_reward: -162.993 [-167.750, -158.236] - loss: 25939.160 - mae: 3319.978 - mean_q: 4463.201 Interval 3359 (1679000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0645 3 episodes - episode_reward: -149.950 [-209.666, -115.439] - loss: 23858.166 - mae: 3290.082 - mean_q: 4417.204 Interval 3360 (1679500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1410 5 episodes - episode_reward: -200.756 [-335.601, -100.000] - loss: 23451.725 - mae: 3237.211 - mean_q: 4343.977 Interval 3361 (1680000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0822 2 episodes - episode_reward: -384.880 [-497.007, -272.753] - loss: 20197.754 - mae: 3164.474 - mean_q: 4243.826 Interval 3362 (1680500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2180 Interval 3363 (1681000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5672 1 episodes - episode_reward: -265.165 [-265.165, -265.165] - loss: 20763.504 - mae: 3040.768 - mean_q: 4087.240 Interval 3364 (1681500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6572 2 episodes - episode_reward: -116.083 [-130.784, -101.383] - loss: 20124.725 - mae: 3017.707 - mean_q: 4053.556 Interval 3365 (1682000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3934 1 episodes - episode_reward: -1310.146 [-1310.146, -1310.146] - loss: 15462.928 - mae: 2977.675 - mean_q: 4004.327 Interval 3366 (1682500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8618 1 episodes - episode_reward: -542.011 [-542.011, -542.011] - loss: 16725.785 - mae: 2996.653 - mean_q: 4031.363 Interval 3367 (1683000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5091 2 episodes - episode_reward: -659.551 [-1204.545, -114.557] - loss: 18909.385 - mae: 2962.738 - mean_q: 3988.774 Interval 3368 (1683500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1527 3 episodes - episode_reward: -321.372 [-536.549, -157.168] - loss: 19893.920 - mae: 2938.277 - mean_q: 3956.777 Interval 3369 (1684000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.3703 7 episodes - episode_reward: -310.051 [-534.509, -100.000] - loss: 17234.316 - mae: 2928.021 - mean_q: 3943.602 Interval 3370 (1684500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9027 3 episodes - episode_reward: -269.798 [-590.834, -107.375] - loss: 16247.314 - mae: 2936.842 - mean_q: 3950.025 Interval 3371 (1685000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8840 3 episodes - episode_reward: -240.514 [-363.874, -145.172] - loss: 17924.840 - mae: 2917.686 - mean_q: 3918.911 Interval 3372 (1685500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4498 2 episodes - episode_reward: -438.484 [-563.195, -313.774] - loss: 17148.639 - mae: 2884.424 - mean_q: 3870.984 Interval 3373 (1686000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.8647 5 episodes - episode_reward: -415.852 [-1204.339, -106.094] - loss: 17178.000 - mae: 2847.158 - mean_q: 3815.075 Interval 3374 (1686500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4632 5 episodes - episode_reward: -251.478 [-493.326, -55.920] - loss: 14264.859 - mae: 2756.822 - mean_q: 3690.697 Interval 3375 (1687000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7607 2 episodes - episode_reward: -638.612 [-829.283, -447.940] - loss: 14495.369 - mae: 2747.258 - mean_q: 3675.281 Interval 3376 (1687500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3073 5 episodes - episode_reward: -233.075 [-526.175, -116.002] - loss: 16908.680 - mae: 2705.120 - mean_q: 3619.482 Interval 3377 (1688000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2034 4 episodes - episode_reward: -274.487 [-538.236, -74.611] - loss: 17917.279 - mae: 2664.760 - mean_q: 3566.171 Interval 3378 (1688500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.5063 5 episodes - episode_reward: -260.572 [-565.808, -34.476] - loss: 19637.006 - mae: 2684.237 - mean_q: 3597.385 Interval 3379 (1689000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.7129 6 episodes - episode_reward: -197.273 [-343.522, -134.684] - loss: 17030.764 - mae: 2659.544 - mean_q: 3569.042 Interval 3380 (1689500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4251 5 episodes - episode_reward: -280.671 [-408.136, -155.806] - loss: 18014.281 - mae: 2620.484 - mean_q: 3519.091 Interval 3381 (1690000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.5023 7 episodes - episode_reward: -255.340 [-486.652, -79.847] - loss: 18636.373 - mae: 2663.004 - mean_q: 3580.861 Interval 3382 (1690500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.0094 6 episodes - episode_reward: -256.694 [-421.779, -98.373] - loss: 18703.537 - mae: 2650.644 - mean_q: 3566.042 Interval 3383 (1691000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4919 6 episodes - episode_reward: -204.387 [-721.021, -53.517] - loss: 21922.586 - mae: 2731.119 - mean_q: 3676.136 Interval 3384 (1691500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.8067 5 episodes - episode_reward: -285.935 [-781.455, -109.064] - loss: 21300.348 - mae: 2769.666 - mean_q: 3732.880 Interval 3385 (1692000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4339 1 episodes - episode_reward: -152.600 [-152.600, -152.600] - loss: 23485.602 - mae: 2823.073 - mean_q: 3807.861 Interval 3386 (1692500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5090 3 episodes - episode_reward: -272.554 [-304.619, -210.165] - loss: 24938.266 - mae: 2902.363 - mean_q: 3914.832 Interval 3387 (1693000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1897 2 episodes - episode_reward: -389.648 [-545.562, -233.735] - loss: 26322.961 - mae: 2998.872 - mean_q: 4045.227 Interval 3388 (1693500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.6226 4 episodes - episode_reward: -501.452 [-705.914, -225.604] - loss: 28418.793 - mae: 3044.861 - mean_q: 4104.792 Interval 3389 (1694000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9741 4 episodes - episode_reward: -370.143 [-490.256, -155.041] - loss: 36958.469 - mae: 3089.430 - mean_q: 4166.868 Interval 3390 (1694500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.7869 4 episodes - episode_reward: -357.662 [-599.416, -200.016] - loss: 29788.881 - mae: 3092.867 - mean_q: 4168.563 Interval 3391 (1695000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1141 3 episodes - episode_reward: -540.758 [-680.456, -409.081] - loss: 29402.980 - mae: 3117.373 - mean_q: 4202.998 Interval 3392 (1695500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -5.0535 5 episodes - episode_reward: -493.366 [-776.665, -195.208] - loss: 34060.859 - mae: 3159.695 - mean_q: 4257.480 Interval 3393 (1696000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3061 2 episodes - episode_reward: -287.326 [-287.638, -287.013] - loss: 29976.547 - mae: 3091.331 - mean_q: 4169.281 Interval 3394 (1696500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1643 2 episodes - episode_reward: -565.195 [-663.980, -466.410] - loss: 26357.957 - mae: 3065.958 - mean_q: 4140.876 Interval 3395 (1697000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4774 5 episodes - episode_reward: -431.456 [-778.341, -301.919] - loss: 30785.746 - mae: 3098.176 - mean_q: 4182.924 Interval 3396 (1697500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.6669 5 episodes - episode_reward: -295.301 [-520.187, -100.000] - loss: 29662.006 - mae: 3040.948 - mean_q: 4109.786 Interval 3397 (1698000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3719 1 episodes - episode_reward: -859.869 [-859.869, -859.869] - loss: 25338.545 - mae: 3108.479 - mean_q: 4208.855 Interval 3398 (1698500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.5654 4 episodes - episode_reward: -409.110 [-624.941, -171.922] - loss: 29964.287 - mae: 3101.144 - mean_q: 4195.607 Interval 3399 (1699000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.2641 2 episodes - episode_reward: -838.730 [-1225.678, -451.782] - loss: 33515.723 - mae: 3103.961 - mean_q: 4200.403 Interval 3400 (1699500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5561 3 episodes - episode_reward: -202.248 [-269.715, -97.784] - loss: 32648.404 - mae: 3095.005 - mean_q: 4195.957 Interval 3401 (1700000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.3625 5 episodes - episode_reward: -365.653 [-967.716, -126.158] - loss: 34548.258 - mae: 3065.944 - mean_q: 4160.622 Interval 3402 (1700500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3498 2 episodes - episode_reward: -204.505 [-301.188, -107.822] - loss: 33528.125 - mae: 3129.330 - mean_q: 4250.633 Interval 3403 (1701000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4012 3 episodes - episode_reward: -468.849 [-605.818, -329.721] - loss: 46726.211 - mae: 3166.710 - mean_q: 4301.542 Interval 3404 (1701500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1083 Interval 3405 (1702000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3474 2 episodes - episode_reward: -604.175 [-817.162, -391.188] - loss: 42484.625 - mae: 3292.757 - mean_q: 4477.218 Interval 3406 (1702500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.7909 3 episodes - episode_reward: -801.341 [-1942.762, -105.746] - loss: 40721.969 - mae: 3326.334 - mean_q: 4527.546 Interval 3407 (1703000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3249 2 episodes - episode_reward: -207.981 [-267.130, -148.833] - loss: 41936.621 - mae: 3400.973 - mean_q: 4631.751 Interval 3408 (1703500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.2932 Interval 3409 (1704000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -10.2407 1 episodes - episode_reward: -7875.792 [-7875.792, -7875.792] - loss: 52470.270 - mae: 3528.000 - mean_q: 4796.912 Interval 3410 (1704500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.5016 Interval 3411 (1705000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.5641 2 episodes - episode_reward: -1816.343 [-2900.560, -732.126] - loss: 41274.453 - mae: 3678.461 - mean_q: 5006.152 Interval 3412 (1705500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.1914 5 episodes - episode_reward: -435.605 [-1282.212, -26.834] - loss: 56573.891 - mae: 3714.712 - mean_q: 5047.987 Interval 3413 (1706000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1040 Interval 3414 (1706500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.9854 2 episodes - episode_reward: -1287.538 [-2005.117, -569.958] - loss: 63385.969 - mae: 3809.087 - mean_q: 5186.387 Interval 3415 (1707000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5232 1 episodes - episode_reward: -709.882 [-709.882, -709.882] - loss: 49504.883 - mae: 3850.675 - mean_q: 5247.107 Interval 3416 (1707500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.2615 Interval 3417 (1708000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.5357 1 episodes - episode_reward: -3272.062 [-3272.062, -3272.062] - loss: 44129.965 - mae: 3995.833 - mean_q: 5443.863 Interval 3418 (1708500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.6477 1 episodes - episode_reward: -3278.549 [-3278.549, -3278.549] - loss: 50913.133 - mae: 3990.458 - mean_q: 5433.439 Interval 3419 (1709000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0214 2 episodes - episode_reward: -433.379 [-756.772, -109.985] - loss: 57423.434 - mae: 4051.606 - mean_q: 5510.963 Interval 3420 (1709500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6650 4 episodes - episode_reward: -418.579 [-1106.609, -100.957] - loss: 52491.094 - mae: 3986.476 - mean_q: 5421.504 Interval 3421 (1710000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8732 1 episodes - episode_reward: -1484.847 [-1484.847, -1484.847] - loss: 54986.555 - mae: 3997.476 - mean_q: 5436.285 Interval 3422 (1710500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0770 1 episodes - episode_reward: -514.295 [-514.295, -514.295] - loss: 48647.973 - mae: 3971.302 - mean_q: 5396.383 Interval 3423 (1711000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8809 2 episodes - episode_reward: -138.750 [-155.356, -122.145] - loss: 45314.555 - mae: 3916.718 - mean_q: 5318.875 Interval 3424 (1711500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2359 2 episodes - episode_reward: -587.328 [-912.630, -262.026] - loss: 42236.711 - mae: 3916.060 - mean_q: 5316.603 Interval 3425 (1712000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0368 Interval 3426 (1712500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9926 4 episodes - episode_reward: -283.222 [-696.496, -92.594] - loss: 42080.070 - mae: 3826.090 - mean_q: 5179.598 Interval 3427 (1713000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5702 1 episodes - episode_reward: -591.572 [-591.572, -591.572] - loss: 43500.039 - mae: 3753.113 - mean_q: 5085.474 Interval 3428 (1713500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5807 1 episodes - episode_reward: -952.534 [-952.534, -952.534] - loss: 46272.664 - mae: 3732.336 - mean_q: 5055.042 Interval 3429 (1714000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2611 Interval 3430 (1714500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3482 Interval 3431 (1715000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.8745 Interval 3432 (1715500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.5079 2 episodes - episode_reward: -1002.995 [-1555.994, -449.995] - loss: 45268.840 - mae: 3584.141 - mean_q: 4832.685 Interval 3433 (1716000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4726 Interval 3434 (1716500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0270 3 episodes - episode_reward: -534.578 [-871.487, -212.677] - loss: 33774.469 - mae: 3517.708 - mean_q: 4734.418 Interval 3435 (1717000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2481 2 episodes - episode_reward: -626.259 [-1039.464, -213.054] - loss: 32757.789 - mae: 3375.253 - mean_q: 4542.949 Interval 3436 (1717500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6736 2 episodes - episode_reward: -382.757 [-548.861, -216.653] - loss: 41969.938 - mae: 3312.689 - mean_q: 4454.479 Interval 3437 (1718000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.7646 2 episodes - episode_reward: -1126.986 [-2045.722, -208.249] - loss: 35872.129 - mae: 3238.236 - mean_q: 4359.790 Interval 3438 (1718500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0103 2 episodes - episode_reward: -293.749 [-444.689, -142.809] - loss: 42737.586 - mae: 3156.082 - mean_q: 4251.810 Interval 3439 (1719000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.4096 2 episodes - episode_reward: -1180.986 [-2020.604, -341.367] - loss: 34960.203 - mae: 3145.083 - mean_q: 4235.930 Interval 3440 (1719500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7483 1 episodes - episode_reward: -1357.585 [-1357.585, -1357.585] - loss: 41971.160 - mae: 3088.145 - mean_q: 4156.768 Interval 3441 (1720000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9497 2 episodes - episode_reward: -379.379 [-517.592, -241.167] - loss: 35573.527 - mae: 3047.754 - mean_q: 4108.625 Interval 3442 (1720500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1565 2 episodes - episode_reward: -316.753 [-462.509, -170.996] - loss: 31018.260 - mae: 3043.713 - mean_q: 4103.664 Interval 3443 (1721000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8826 1 episodes - episode_reward: -519.784 [-519.784, -519.784] - loss: 30506.768 - mae: 3039.912 - mean_q: 4099.529 Interval 3444 (1721500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2924 2 episodes - episode_reward: -309.323 [-440.795, -177.851] - loss: 28676.977 - mae: 3011.901 - mean_q: 4063.310 Interval 3445 (1722000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5131 1 episodes - episode_reward: -289.393 [-289.393, -289.393] - loss: 39976.980 - mae: 3036.360 - mean_q: 4099.046 Interval 3446 (1722500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3184 Interval 3447 (1723000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6246 3 episodes - episode_reward: -333.526 [-464.658, -129.864] - loss: 29311.641 - mae: 3080.353 - mean_q: 4163.009 Interval 3448 (1723500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3271 Interval 3449 (1724000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0298 Interval 3450 (1724500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.7068 3 episodes - episode_reward: -633.969 [-1204.598, -327.508] - loss: 37793.496 - mae: 3187.892 - mean_q: 4313.678 Interval 3451 (1725000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6215 2 episodes - episode_reward: -476.325 [-524.138, -428.512] - loss: 32549.311 - mae: 3203.600 - mean_q: 4328.770 Interval 3452 (1725500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0801 Interval 3453 (1726000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1065 Interval 3454 (1726500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1554 Interval 3455 (1727000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.7027 2 episodes - episode_reward: -205.648 [-255.062, -156.234] - loss: 48115.430 - mae: 3248.620 - mean_q: 4381.972 Interval 3456 (1727500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2453 2 episodes - episode_reward: -303.060 [-521.024, -85.096] - loss: 53731.203 - mae: 3234.497 - mean_q: 4372.432 Interval 3457 (1728000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7508 1 episodes - episode_reward: -416.759 [-416.759, -416.759] - loss: 41611.613 - mae: 3265.832 - mean_q: 4421.188 Interval 3458 (1728500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2397 Interval 3459 (1729000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6232 1 episodes - episode_reward: -356.449 [-356.449, -356.449] - loss: 40782.312 - mae: 3302.849 - mean_q: 4481.240 Interval 3460 (1729500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3931 Interval 3461 (1730000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2624 1 episodes - episode_reward: -445.155 [-445.155, -445.155] - loss: 36951.836 - mae: 3449.073 - mean_q: 4688.994 Interval 3462 (1730500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8836 2 episodes - episode_reward: -1412.441 [-2535.134, -289.749] - loss: 42163.648 - mae: 3551.039 - mean_q: 4829.069 Interval 3463 (1731000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2994 2 episodes - episode_reward: -320.045 [-337.128, -302.962] - loss: 40418.828 - mae: 3645.410 - mean_q: 4959.701 Interval 3464 (1731500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1997 2 episodes - episode_reward: -288.580 [-318.082, -259.079] - loss: 46135.645 - mae: 3699.033 - mean_q: 5029.925 Interval 3465 (1732000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6568 4 episodes - episode_reward: -221.854 [-399.155, -114.820] - loss: 46086.594 - mae: 3766.816 - mean_q: 5119.958 Interval 3466 (1732500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5547 2 episodes - episode_reward: -359.821 [-360.459, -359.183] - loss: 44247.027 - mae: 3812.139 - mean_q: 5180.866 Interval 3467 (1733000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0939 1 episodes - episode_reward: -243.354 [-243.354, -243.354] - loss: 44374.336 - mae: 3939.135 - mean_q: 5346.492 Interval 3468 (1733500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7169 1 episodes - episode_reward: -984.943 [-984.943, -984.943] - loss: 47645.844 - mae: 4014.740 - mean_q: 5448.583 Interval 3469 (1734000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4246 Interval 3470 (1734500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5769 Interval 3471 (1735000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2145 4 episodes - episode_reward: -718.729 [-1495.891, -117.051] - loss: 46225.270 - mae: 4198.062 - mean_q: 5684.308 Interval 3472 (1735500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3260 1 episodes - episode_reward: -1033.237 [-1033.237, -1033.237] - loss: 46789.387 - mae: 4198.972 - mean_q: 5681.548 Interval 3473 (1736000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7357 2 episodes - episode_reward: -688.623 [-1051.501, -325.745] - loss: 47004.402 - mae: 4253.312 - mean_q: 5750.428 Interval 3474 (1736500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4756 Interval 3475 (1737000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.4629 2 episodes - episode_reward: -973.367 [-1673.299, -273.435] - loss: 55855.941 - mae: 4268.489 - mean_q: 5759.518 Interval 3476 (1737500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2639 3 episodes - episode_reward: -290.119 [-413.797, -220.584] - loss: 58069.039 - mae: 4313.275 - mean_q: 5815.432 Interval 3477 (1738000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0111 Interval 3478 (1738500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1518 Interval 3479 (1739000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1530 Interval 3480 (1739500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1619 Interval 3481 (1740000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0598 Interval 3482 (1740500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -1.0083 1 episodes - episode_reward: -641.121 [-641.121, -641.121] - loss: 69831.719 - mae: 4404.783 - mean_q: 5935.622 Interval 3483 (1741000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3594 2 episodes - episode_reward: -401.866 [-556.865, -246.867] - loss: 78364.523 - mae: 4467.234 - mean_q: 6018.382 Interval 3484 (1741500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8469 2 episodes - episode_reward: -435.969 [-811.981, -59.956] - loss: 64162.430 - mae: 4433.194 - mean_q: 5974.027 Interval 3485 (1742000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.6502 4 episodes - episode_reward: -542.011 [-934.920, -115.303] - loss: 82723.711 - mae: 4483.410 - mean_q: 6037.190 Interval 3486 (1742500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2999 2 episodes - episode_reward: -631.143 [-968.093, -294.193] - loss: 64714.578 - mae: 4403.160 - mean_q: 5923.922 Interval 3487 (1743000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6175 4 episodes - episode_reward: -168.786 [-327.323, -100.000] - loss: 73721.578 - mae: 4478.188 - mean_q: 6029.086 Interval 3488 (1743500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3680 4 episodes - episode_reward: -447.801 [-1055.925, -105.228] - loss: 62038.781 - mae: 4456.315 - mean_q: 6004.444 Interval 3489 (1744000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8924 5 episodes - episode_reward: -173.116 [-271.218, -83.309] - loss: 53377.414 - mae: 4532.034 - mean_q: 6106.633 Interval 3490 (1744500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8114 1 episodes - episode_reward: -1314.770 [-1314.770, -1314.770] - loss: 66495.844 - mae: 4583.815 - mean_q: 6175.897 Interval 3491 (1745000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3175 4 episodes - episode_reward: -324.393 [-635.971, -94.321] - loss: 91748.273 - mae: 4592.007 - mean_q: 6189.208 Interval 3492 (1745500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0035 5 episodes - episode_reward: -317.796 [-388.210, -126.166] - loss: 72697.227 - mae: 4664.390 - mean_q: 6288.026 Interval 3493 (1746000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9633 4 episodes - episode_reward: -248.191 [-446.771, -139.856] - loss: 64312.977 - mae: 4662.766 - mean_q: 6284.089 Interval 3494 (1746500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7771 3 episodes - episode_reward: -277.444 [-449.628, -116.803] - loss: 71291.430 - mae: 4693.211 - mean_q: 6324.804 Interval 3495 (1747000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2417 5 episodes - episode_reward: -210.237 [-538.695, -30.397] - loss: 46992.434 - mae: 4764.278 - mean_q: 6415.368 Interval 3496 (1747500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2952 2 episodes - episode_reward: -368.064 [-473.802, -262.327] - loss: 68530.758 - mae: 4753.371 - mean_q: 6394.392 Interval 3497 (1748000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6614 1 episodes - episode_reward: -665.253 [-665.253, -665.253] - loss: 57405.164 - mae: 4756.805 - mean_q: 6394.637 Interval 3498 (1748500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8572 4 episodes - episode_reward: -364.426 [-517.218, -241.901] - loss: 53804.078 - mae: 4691.644 - mean_q: 6315.074 Interval 3499 (1749000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8697 1 episodes - episode_reward: -542.318 [-542.318, -542.318] - loss: 63915.555 - mae: 4739.099 - mean_q: 6379.636 Interval 3500 (1749500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0228 1 episodes - episode_reward: -392.082 [-392.082, -392.082] - loss: 49823.457 - mae: 4780.983 - mean_q: 6448.178 Interval 3501 (1750000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4792 1 episodes - episode_reward: -285.414 [-285.414, -285.414] - loss: 68511.664 - mae: 4868.897 - mean_q: 6556.354 Interval 3502 (1750500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5892 1 episodes - episode_reward: -334.287 [-334.287, -334.287] - loss: 62577.809 - mae: 4789.626 - mean_q: 6456.314 Interval 3503 (1751000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0966 2 episodes - episode_reward: -230.071 [-317.098, -143.044] - loss: 50318.863 - mae: 4877.890 - mean_q: 6588.767 Interval 3504 (1751500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3700 1 episodes - episode_reward: -345.017 [-345.017, -345.017] - loss: 56877.035 - mae: 4831.987 - mean_q: 6526.544 Interval 3505 (1752000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0990 Interval 3506 (1752500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2523 Interval 3507 (1753000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8218 3 episodes - episode_reward: -205.853 [-294.706, -113.648] - loss: 60766.586 - mae: 4880.671 - mean_q: 6604.517 Interval 3508 (1753500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0619 2 episodes - episode_reward: -255.288 [-394.012, -116.565] - loss: 67291.969 - mae: 4855.990 - mean_q: 6577.185 Interval 3509 (1754000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4048 5 episodes - episode_reward: -415.864 [-733.680, -222.165] - loss: 66273.984 - mae: 4830.676 - mean_q: 6551.062 Interval 3510 (1754500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9194 2 episodes - episode_reward: -538.271 [-684.352, -392.189] - loss: 64377.660 - mae: 4764.183 - mean_q: 6458.391 Interval 3511 (1755000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9268 2 episodes - episode_reward: -631.405 [-975.120, -287.690] - loss: 59617.742 - mae: 4768.195 - mean_q: 6467.509 Interval 3512 (1755500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.6357 5 episodes - episode_reward: -376.041 [-623.114, -179.327] - loss: 50719.195 - mae: 4735.088 - mean_q: 6431.243 Interval 3513 (1756000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7085 2 episodes - episode_reward: -575.950 [-721.581, -430.318] - loss: 55657.051 - mae: 4743.962 - mean_q: 6440.729 Interval 3514 (1756500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9019 3 episodes - episode_reward: -485.098 [-690.995, -310.744] - loss: 62216.023 - mae: 4643.980 - mean_q: 6295.475 Interval 3515 (1757000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9417 5 episodes - episode_reward: -264.771 [-562.189, -100.000] - loss: 57148.652 - mae: 4619.628 - mean_q: 6264.918 Interval 3516 (1757500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0526 5 episodes - episode_reward: -503.226 [-883.276, -100.000] - loss: 48384.520 - mae: 4497.176 - mean_q: 6095.241 Interval 3517 (1758000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8663 1 episodes - episode_reward: -768.048 [-768.048, -768.048] - loss: 55291.727 - mae: 4384.233 - mean_q: 5934.508 Interval 3518 (1758500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.6368 2 episodes - episode_reward: -867.709 [-1214.391, -521.027] - loss: 42402.945 - mae: 4282.500 - mean_q: 5798.364 Interval 3519 (1759000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7696 3 episodes - episode_reward: -375.430 [-533.286, -263.304] - loss: 52063.496 - mae: 4294.222 - mean_q: 5814.142 Interval 3520 (1759500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3349 2 episodes - episode_reward: -169.742 [-173.239, -166.246] - loss: 40197.062 - mae: 4199.743 - mean_q: 5696.778 Interval 3521 (1760000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2272 4 episodes - episode_reward: -349.662 [-467.495, -100.000] - loss: 54187.840 - mae: 4200.880 - mean_q: 5708.000 Interval 3522 (1760500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2209 1 episodes - episode_reward: -1032.608 [-1032.608, -1032.608] - loss: 47064.137 - mae: 4266.993 - mean_q: 5819.262 Interval 3523 (1761000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7950 Interval 3524 (1761500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5404 1 episodes - episode_reward: -1719.769 [-1719.769, -1719.769] - loss: 50583.734 - mae: 4372.331 - mean_q: 5973.857 Interval 3525 (1762000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3023 1 episodes - episode_reward: -685.433 [-685.433, -685.433] - loss: 58501.574 - mae: 4473.538 - mean_q: 6107.238 Interval 3526 (1762500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1399 1 episodes - episode_reward: -968.971 [-968.971, -968.971] - loss: 38035.527 - mae: 4529.395 - mean_q: 6172.261 Interval 3527 (1763000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9983 1 episodes - episode_reward: -930.132 [-930.132, -930.132] - loss: 56766.980 - mae: 4569.088 - mean_q: 6213.714 Interval 3528 (1763500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9887 1 episodes - episode_reward: -932.636 [-932.636, -932.636] - loss: 46980.266 - mae: 4536.595 - mean_q: 6154.169 Interval 3529 (1764000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1382 1 episodes - episode_reward: -1155.379 [-1155.379, -1155.379] - loss: 59538.852 - mae: 4441.317 - mean_q: 6008.970 Interval 3530 (1764500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9077 4 episodes - episode_reward: -464.104 [-873.017, -188.618] - loss: 42069.875 - mae: 4357.184 - mean_q: 5891.319 Interval 3531 (1765000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8836 1 episodes - episode_reward: -1331.683 [-1331.683, -1331.683] - loss: 45037.691 - mae: 4281.111 - mean_q: 5790.493 Interval 3532 (1765500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4089 3 episodes - episode_reward: -401.594 [-544.371, -243.527] - loss: 43434.391 - mae: 4298.867 - mean_q: 5809.913 Interval 3533 (1766000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2600 3 episodes - episode_reward: -664.271 [-740.234, -515.259] - loss: 33835.770 - mae: 4213.341 - mean_q: 5689.014 Interval 3534 (1766500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2701 2 episodes - episode_reward: -350.041 [-425.943, -274.139] - loss: 41648.906 - mae: 4182.827 - mean_q: 5643.837 Interval 3535 (1767000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3218 3 episodes - episode_reward: -642.020 [-1166.926, -210.398] - loss: 51839.926 - mae: 4087.093 - mean_q: 5514.454 Interval 3536 (1767500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8098 1 episodes - episode_reward: -914.995 [-914.995, -914.995] - loss: 39333.277 - mae: 4133.152 - mean_q: 5591.587 Interval 3537 (1768000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5855 1 episodes - episode_reward: -1652.690 [-1652.690, -1652.690] - loss: 35112.570 - mae: 4113.816 - mean_q: 5574.270 Interval 3538 (1768500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9054 2 episodes - episode_reward: -771.673 [-1209.251, -334.095] - loss: 37286.754 - mae: 4175.754 - mean_q: 5665.153 Interval 3539 (1769000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6671 3 episodes - episode_reward: -585.772 [-1135.733, -241.792] - loss: 36094.848 - mae: 4196.608 - mean_q: 5693.265 Interval 3540 (1769500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5565 2 episodes - episode_reward: -549.349 [-750.515, -348.183] - loss: 36583.125 - mae: 4263.429 - mean_q: 5784.663 Interval 3541 (1770000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5928 1 episodes - episode_reward: -872.454 [-872.454, -872.454] - loss: 41566.988 - mae: 4336.745 - mean_q: 5883.507 Interval 3542 (1770500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5546 1 episodes - episode_reward: -1016.763 [-1016.763, -1016.763] - loss: 42246.379 - mae: 4354.644 - mean_q: 5902.508 Interval 3543 (1771000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2520 3 episodes - episode_reward: -417.897 [-614.646, -178.071] - loss: 48274.965 - mae: 4316.193 - mean_q: 5842.847 Interval 3544 (1771500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5925 4 episodes - episode_reward: -341.146 [-793.540, -181.999] - loss: 36273.234 - mae: 4262.046 - mean_q: 5766.312 Interval 3545 (1772000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4840 1 episodes - episode_reward: -375.562 [-375.562, -375.562] - loss: 40900.219 - mae: 4211.027 - mean_q: 5692.205 Interval 3546 (1772500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2174 4 episodes - episode_reward: -358.618 [-656.423, -100.000] - loss: 41487.180 - mae: 4171.824 - mean_q: 5630.156 Interval 3547 (1773000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0123 Interval 3548 (1773500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4252 2 episodes - episode_reward: -788.747 [-1316.146, -261.347] - loss: 37505.199 - mae: 4068.090 - mean_q: 5480.396 Interval 3549 (1774000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3239 1 episodes - episode_reward: -812.644 [-812.644, -812.644] - loss: 40302.977 - mae: 3999.279 - mean_q: 5387.334 Interval 3550 (1774500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0509 1 episodes - episode_reward: -331.562 [-331.562, -331.562] - loss: 42128.094 - mae: 3969.319 - mean_q: 5348.112 Interval 3551 (1775000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9590 1 episodes - episode_reward: -1339.090 [-1339.090, -1339.090] - loss: 37235.375 - mae: 3964.216 - mean_q: 5341.146 Interval 3552 (1775500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9532 3 episodes - episode_reward: -445.079 [-1081.275, -123.500] - loss: 39172.625 - mae: 3950.417 - mean_q: 5319.757 Interval 3553 (1776000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5303 2 episodes - episode_reward: -324.843 [-427.356, -222.330] - loss: 41741.188 - mae: 3953.141 - mean_q: 5325.200 Interval 3554 (1776500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3879 1 episodes - episode_reward: -250.750 [-250.750, -250.750] - loss: 48048.480 - mae: 3918.775 - mean_q: 5274.373 Interval 3555 (1777000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3817 3 episodes - episode_reward: -586.664 [-1223.530, -264.919] - loss: 40393.797 - mae: 3890.337 - mean_q: 5231.550 Interval 3556 (1777500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2073 5 episodes - episode_reward: -220.726 [-455.498, -69.443] - loss: 40987.656 - mae: 3877.071 - mean_q: 5209.417 Interval 3557 (1778000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9238 4 episodes - episode_reward: -175.141 [-307.839, -37.169] - loss: 35738.176 - mae: 3823.256 - mean_q: 5143.249 Interval 3558 (1778500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7666 3 episodes - episode_reward: -388.724 [-579.406, -214.680] - loss: 44345.121 - mae: 3769.655 - mean_q: 5057.483 Interval 3559 (1779000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.0587 5 episodes - episode_reward: -574.786 [-1162.068, -114.258] - loss: 36081.066 - mae: 3710.875 - mean_q: 4981.070 Interval 3560 (1779500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1510 3 episodes - episode_reward: -203.666 [-262.585, -157.989] - loss: 42176.129 - mae: 3588.100 - mean_q: 4813.854 Interval 3561 (1780000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3004 2 episodes - episode_reward: -588.424 [-1067.033, -109.816] - loss: 44649.797 - mae: 3492.988 - mean_q: 4684.505 Interval 3562 (1780500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8650 1 episodes - episode_reward: -198.478 [-198.478, -198.478] - loss: 32132.504 - mae: 3463.360 - mean_q: 4651.430 Interval 3563 (1781000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5825 4 episodes - episode_reward: -529.735 [-733.583, -431.155] - loss: 39820.062 - mae: 3426.533 - mean_q: 4599.518 Interval 3564 (1781500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.4875 6 episodes - episode_reward: -456.340 [-676.649, -100.000] - loss: 35177.203 - mae: 3395.310 - mean_q: 4564.016 Interval 3565 (1782000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1708 1 episodes - episode_reward: -553.191 [-553.191, -553.191] - loss: 30963.934 - mae: 3346.403 - mean_q: 4504.561 Interval 3566 (1782500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0121 Interval 3567 (1783000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4176 1 episodes - episode_reward: -217.785 [-217.785, -217.785] - loss: 26898.467 - mae: 3313.121 - mean_q: 4465.017 Interval 3568 (1783500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6768 1 episodes - episode_reward: -253.857 [-253.857, -253.857] - loss: 24696.168 - mae: 3280.258 - mean_q: 4423.084 Interval 3569 (1784000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1575 3 episodes - episode_reward: -195.685 [-335.512, -100.000] - loss: 26874.266 - mae: 3193.777 - mean_q: 4306.574 Interval 3570 (1784500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6867 2 episodes - episode_reward: -279.951 [-338.952, -220.950] - loss: 30488.660 - mae: 3167.049 - mean_q: 4271.842 Interval 3571 (1785000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7801 2 episodes - episode_reward: -84.172 [-391.702, 223.357] - loss: 28379.029 - mae: 3185.132 - mean_q: 4296.743 Interval 3572 (1785500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8618 3 episodes - episode_reward: -273.715 [-471.388, -100.000] - loss: 31715.682 - mae: 3163.823 - mean_q: 4267.831 Interval 3573 (1786000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5013 6 episodes - episode_reward: -242.734 [-337.245, -133.163] - loss: 30713.080 - mae: 3122.208 - mean_q: 4204.363 Interval 3574 (1786500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8158 2 episodes - episode_reward: -391.941 [-407.955, -375.927] - loss: 27671.779 - mae: 3074.820 - mean_q: 4140.155 Interval 3575 (1787000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3572 4 episodes - episode_reward: -183.628 [-293.775, -105.028] - loss: 25627.398 - mae: 3093.467 - mean_q: 4163.768 Interval 3576 (1787500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3419 2 episodes - episode_reward: -497.387 [-583.595, -411.180] - loss: 24156.869 - mae: 3111.251 - mean_q: 4190.725 Interval 3577 (1788000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.6791 4 episodes - episode_reward: -586.918 [-796.736, -357.586] - loss: 28047.502 - mae: 3125.089 - mean_q: 4215.059 Interval 3578 (1788500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.8986 4 episodes - episode_reward: -539.083 [-909.141, -215.970] - loss: 27812.869 - mae: 3082.282 - mean_q: 4153.215 Interval 3579 (1789000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3977 3 episodes - episode_reward: -228.830 [-287.379, -169.125] - loss: 26755.924 - mae: 3051.284 - mean_q: 4114.408 Interval 3580 (1789500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1603 2 episodes - episode_reward: -288.959 [-477.917, -100.000] - loss: 21063.740 - mae: 3018.022 - mean_q: 4075.326 Interval 3581 (1790000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4291 3 episodes - episode_reward: -220.417 [-251.670, -170.226] - loss: 21490.191 - mae: 3056.825 - mean_q: 4132.152 Interval 3582 (1790500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5624 2 episodes - episode_reward: -631.565 [-830.074, -433.055] - loss: 26870.115 - mae: 3095.038 - mean_q: 4176.876 Interval 3583 (1791000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6869 2 episodes - episode_reward: -424.401 [-582.291, -266.511] - loss: 24676.713 - mae: 3106.177 - mean_q: 4193.302 Interval 3584 (1791500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2519 3 episodes - episode_reward: -215.821 [-307.121, -155.174] - loss: 23639.881 - mae: 3077.887 - mean_q: 4156.374 Interval 3585 (1792000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0687 1 episodes - episode_reward: -487.298 [-487.298, -487.298] - loss: 23931.475 - mae: 3140.352 - mean_q: 4240.904 Interval 3586 (1792500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7432 1 episodes - episode_reward: -398.349 [-398.349, -398.349] - loss: 24847.852 - mae: 3186.054 - mean_q: 4298.365 Interval 3587 (1793000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7209 3 episodes - episode_reward: -278.050 [-483.220, -91.723] - loss: 24843.182 - mae: 3155.386 - mean_q: 4252.712 Interval 3588 (1793500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0183 1 episodes - episode_reward: -588.085 [-588.085, -588.085] - loss: 23469.795 - mae: 3146.516 - mean_q: 4242.290 Interval 3589 (1794000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1791 1 episodes - episode_reward: -2009.163 [-2009.163, -2009.163] - loss: 20529.906 - mae: 3198.693 - mean_q: 4310.144 Interval 3590 (1794500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.4605 4 episodes - episode_reward: -440.210 [-589.722, -107.568] - loss: 25992.648 - mae: 3164.046 - mean_q: 4260.930 Interval 3591 (1795000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6769 2 episodes - episode_reward: -257.629 [-400.950, -114.307] - loss: 23179.275 - mae: 3136.290 - mean_q: 4225.742 Interval 3592 (1795500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3084 3 episodes - episode_reward: -822.860 [-1256.379, -552.068] - loss: 20575.215 - mae: 3171.584 - mean_q: 4274.568 Interval 3593 (1796000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5603 3 episodes - episode_reward: -420.915 [-544.415, -302.280] - loss: 25457.131 - mae: 3133.230 - mean_q: 4224.550 Interval 3594 (1796500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8729 Interval 3595 (1797000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6948 2 episodes - episode_reward: -673.158 [-685.133, -661.183] - loss: 21263.801 - mae: 3156.957 - mean_q: 4264.833 Interval 3596 (1797500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9094 2 episodes - episode_reward: -641.677 [-722.728, -560.627] - loss: 24913.572 - mae: 3092.125 - mean_q: 4175.282 Interval 3597 (1798000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2626 2 episodes - episode_reward: -282.845 [-462.173, -103.517] - loss: 22520.924 - mae: 3161.842 - mean_q: 4271.077 Interval 3598 (1798500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4903 1 episodes - episode_reward: -926.199 [-926.199, -926.199] - loss: 19976.172 - mae: 3198.047 - mean_q: 4330.041 Interval 3599 (1799000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3566 1 episodes - episode_reward: -525.547 [-525.547, -525.547] - loss: 21375.432 - mae: 3272.053 - mean_q: 4430.355 Interval 3600 (1799500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5126 2 episodes - episode_reward: -340.037 [-362.933, -317.142] - loss: 24462.635 - mae: 3279.474 - mean_q: 4444.175 Interval 3601 (1800000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6991 1 episodes - episode_reward: -644.150 [-644.150, -644.150] - loss: 24644.904 - mae: 3316.379 - mean_q: 4506.027 Interval 3602 (1800500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9289 3 episodes - episode_reward: -333.014 [-709.708, -141.425] - loss: 23148.945 - mae: 3428.898 - mean_q: 4669.225 Interval 3603 (1801000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9766 4 episodes - episode_reward: -368.911 [-645.518, -182.139] - loss: 22781.822 - mae: 3559.430 - mean_q: 4844.935 Interval 3604 (1801500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4349 Interval 3605 (1802000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1061 Interval 3606 (1802500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5028 1 episodes - episode_reward: -2097.693 [-2097.693, -2097.693] - loss: 27822.855 - mae: 3843.938 - mean_q: 5207.695 Interval 3607 (1803000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4236 Interval 3608 (1803500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1484 1 episodes - episode_reward: -1561.582 [-1561.582, -1561.582] - loss: 26354.986 - mae: 3816.073 - mean_q: 5157.759 Interval 3609 (1804000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1929 2 episodes - episode_reward: -1109.896 [-1986.017, -233.775] - loss: 21732.945 - mae: 3845.365 - mean_q: 5195.426 Interval 3610 (1804500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3596 1 episodes - episode_reward: -666.884 [-666.884, -666.884] - loss: 30755.240 - mae: 3775.510 - mean_q: 5095.567 Interval 3611 (1805000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2604 1 episodes - episode_reward: -734.324 [-734.324, -734.324] - loss: 27892.312 - mae: 3692.421 - mean_q: 4981.497 Interval 3612 (1805500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8848 1 episodes - episode_reward: -561.697 [-561.697, -561.697] - loss: 26861.225 - mae: 3654.865 - mean_q: 4928.411 Interval 3613 (1806000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8476 1 episodes - episode_reward: -1691.071 [-1691.071, -1691.071] - loss: 23022.680 - mae: 3600.424 - mean_q: 4853.138 Interval 3614 (1806500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7641 2 episodes - episode_reward: -477.173 [-682.798, -271.548] - loss: 23634.629 - mae: 3540.686 - mean_q: 4773.192 Interval 3615 (1807000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8865 1 episodes - episode_reward: -546.383 [-546.383, -546.383] - loss: 21532.977 - mae: 3527.731 - mean_q: 4757.694 Interval 3616 (1807500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4859 2 episodes - episode_reward: -816.533 [-1316.362, -316.703] - loss: 22399.740 - mae: 3456.531 - mean_q: 4653.252 Interval 3617 (1808000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9346 1 episodes - episode_reward: -1460.174 [-1460.174, -1460.174] - loss: 24545.111 - mae: 3454.515 - mean_q: 4644.601 Interval 3618 (1808500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2515 1 episodes - episode_reward: -290.552 [-290.552, -290.552] - loss: 25444.943 - mae: 3377.409 - mean_q: 4540.462 Interval 3619 (1809000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7321 1 episodes - episode_reward: -926.472 [-926.472, -926.472] - loss: 25141.896 - mae: 3337.578 - mean_q: 4482.369 Interval 3620 (1809500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3722 2 episodes - episode_reward: -585.187 [-819.884, -350.489] - loss: 22547.918 - mae: 3296.233 - mean_q: 4432.728 Interval 3621 (1810000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9960 1 episodes - episode_reward: -623.140 [-623.140, -623.140] - loss: 21887.246 - mae: 3255.362 - mean_q: 4379.401 Interval 3622 (1810500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1206 2 episodes - episode_reward: -617.112 [-946.995, -287.229] - loss: 22154.463 - mae: 3194.488 - mean_q: 4302.833 Interval 3623 (1811000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5256 1 episodes - episode_reward: -695.882 [-695.882, -695.882] - loss: 21176.506 - mae: 3145.932 - mean_q: 4241.988 Interval 3624 (1811500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2781 1 episodes - episode_reward: -330.267 [-330.267, -330.267] - loss: 18584.867 - mae: 3130.241 - mean_q: 4221.374 Interval 3625 (1812000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.4001 4 episodes - episode_reward: -337.646 [-655.693, -184.174] - loss: 22457.471 - mae: 3084.821 - mean_q: 4161.417 Interval 3626 (1812500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9258 3 episodes - episode_reward: -547.867 [-969.361, -306.740] - loss: 19715.227 - mae: 3058.531 - mean_q: 4130.698 Interval 3627 (1813000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4130 2 episodes - episode_reward: -376.033 [-567.395, -184.671] - loss: 23318.809 - mae: 3061.894 - mean_q: 4133.849 Interval 3628 (1813500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7904 2 episodes - episode_reward: -371.529 [-484.282, -258.776] - loss: 20635.195 - mae: 3060.791 - mean_q: 4133.869 Interval 3629 (1814000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.3327 3 episodes - episode_reward: -393.692 [-884.338, -117.519] - loss: 21311.008 - mae: 3086.039 - mean_q: 4166.516 Interval 3630 (1814500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1135 Interval 3631 (1815000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0620 Interval 3632 (1815500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2789 Interval 3633 (1816000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1170 Interval 3634 (1816500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1635 Interval 3635 (1817000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.2436 Interval 3636 (1817500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1084 Interval 3637 (1818000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.3363 Interval 3638 (1818500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -1.3042 1 episodes - episode_reward: -1316.094 [-1316.094, -1316.094] - loss: 33689.691 - mae: 3292.736 - mean_q: 4423.580 Interval 3639 (1819000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0123 3 episodes - episode_reward: -368.101 [-553.537, -223.609] - loss: 40008.695 - mae: 3239.221 - mean_q: 4349.580 Interval 3640 (1819500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8794 1 episodes - episode_reward: -532.054 [-532.054, -532.054] - loss: 29851.098 - mae: 3176.388 - mean_q: 4258.722 Interval 3641 (1820000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6574 2 episodes - episode_reward: -518.033 [-864.229, -171.838] - loss: 18375.305 - mae: 3078.774 - mean_q: 4125.730 Interval 3642 (1820500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.8977 3 episodes - episode_reward: -671.498 [-1203.508, -209.008] - loss: 19631.006 - mae: 2972.680 - mean_q: 3979.299 Interval 3643 (1821000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4754 3 episodes - episode_reward: -477.005 [-579.298, -351.394] - loss: 20576.078 - mae: 2917.837 - mean_q: 3906.664 Interval 3644 (1821500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.3225 1 episodes - episode_reward: -1015.378 [-1015.378, -1015.378] - loss: 17763.941 - mae: 2767.035 - mean_q: 3702.762 Interval 3645 (1822000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0777 1 episodes - episode_reward: -654.423 [-654.423, -654.423] - loss: 16976.703 - mae: 2698.039 - mean_q: 3609.634 Interval 3646 (1822500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4848 2 episodes - episode_reward: -551.217 [-771.444, -330.990] - loss: 16814.074 - mae: 2596.915 - mean_q: 3474.323 Interval 3647 (1823000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8395 Interval 3648 (1823500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -1.1542 1 episodes - episode_reward: -1061.485 [-1061.485, -1061.485] - loss: 15943.642 - mae: 2434.072 - mean_q: 3254.908 Interval 3649 (1824000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1633 1 episodes - episode_reward: -245.410 [-245.410, -245.410] - loss: 14263.701 - mae: 2389.408 - mean_q: 3198.546 Interval 3650 (1824500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9539 Interval 3651 (1825000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3048 2 episodes - episode_reward: -779.686 [-1002.151, -557.222] - loss: 15780.318 - mae: 2305.051 - mean_q: 3088.144 Interval 3652 (1825500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.9857 1 episodes - episode_reward: -1403.512 [-1403.512, -1403.512] - loss: 17764.904 - mae: 2270.616 - mean_q: 3042.582 Interval 3653 (1826000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2968 4 episodes - episode_reward: -163.853 [-272.275, -91.523] - loss: 15264.585 - mae: 2269.186 - mean_q: 3043.988 Interval 3654 (1826500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1934 1 episodes - episode_reward: -658.438 [-658.438, -658.438] - loss: 15754.103 - mae: 2253.545 - mean_q: 3026.955 Interval 3655 (1827000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.2045 2 episodes - episode_reward: -693.283 [-825.399, -561.166] - loss: 16920.035 - mae: 2253.811 - mean_q: 3031.482 Interval 3656 (1827500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2399 Interval 3657 (1828000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5271 3 episodes - episode_reward: -530.161 [-999.611, -145.334] - loss: 17708.207 - mae: 2284.833 - mean_q: 3078.251 Interval 3658 (1828500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5090 1 episodes - episode_reward: -452.466 [-452.466, -452.466] - loss: 24696.338 - mae: 2313.395 - mean_q: 3118.860 Interval 3659 (1829000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1936 1 episodes - episode_reward: -1318.559 [-1318.559, -1318.559] - loss: 21235.857 - mae: 2341.423 - mean_q: 3157.917 Interval 3660 (1829500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8901 Interval 3661 (1830000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.9200 1 episodes - episode_reward: -1306.096 [-1306.096, -1306.096] - loss: 25300.943 - mae: 2437.816 - mean_q: 3289.262 Interval 3662 (1830500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3442 Interval 3663 (1831000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0671 Interval 3664 (1831500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.6807 5 episodes - episode_reward: -430.584 [-1264.387, -81.834] - loss: 31891.916 - mae: 2473.501 - mean_q: 3319.197 Interval 3665 (1832000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4049 6 episodes - episode_reward: -196.254 [-348.576, -86.973] - loss: 29981.961 - mae: 2466.679 - mean_q: 3308.518 Interval 3666 (1832500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3354 6 episodes - episode_reward: -210.526 [-404.423, -92.925] - loss: 41874.961 - mae: 2471.835 - mean_q: 3314.416 Interval 3667 (1833000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5798 2 episodes - episode_reward: -403.254 [-620.651, -185.858] - loss: 60693.977 - mae: 2455.653 - mean_q: 3287.458 Interval 3668 (1833500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6889 4 episodes - episode_reward: -207.795 [-350.546, -114.569] - loss: 23817.236 - mae: 2446.379 - mean_q: 3275.413 Interval 3669 (1834000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9645 3 episodes - episode_reward: -310.351 [-528.729, -131.470] - loss: 47758.602 - mae: 2434.594 - mean_q: 3258.824 Interval 3670 (1834500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7660 5 episodes - episode_reward: -175.577 [-230.348, -110.864] - loss: 27710.535 - mae: 2416.712 - mean_q: 3239.942 Interval 3671 (1835000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7773 3 episodes - episode_reward: -311.364 [-418.510, -200.311] - loss: 31137.148 - mae: 2409.697 - mean_q: 3228.195 Interval 3672 (1835500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8502 4 episodes - episode_reward: -234.218 [-401.418, -100.000] - loss: 43227.027 - mae: 2382.161 - mean_q: 3190.494 Interval 3673 (1836000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2520 3 episodes - episode_reward: -199.245 [-300.158, -115.551] - loss: 28143.838 - mae: 2343.740 - mean_q: 3142.742 Interval 3674 (1836500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8031 1 episodes - episode_reward: -298.296 [-298.296, -298.296] - loss: 27464.506 - mae: 2340.499 - mean_q: 3143.095 Interval 3675 (1837000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1704 5 episodes - episode_reward: -239.535 [-547.289, -24.675] - loss: 20355.926 - mae: 2338.699 - mean_q: 3143.856 Interval 3676 (1837500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9510 4 episodes - episode_reward: -124.541 [-161.690, -101.325] - loss: 25753.432 - mae: 2329.341 - mean_q: 3132.076 Interval 3677 (1838000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5277 4 episodes - episode_reward: -295.781 [-330.191, -229.879] - loss: 21556.361 - mae: 2295.726 - mean_q: 3087.273 Interval 3678 (1838500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8296 2 episodes - episode_reward: -241.519 [-274.581, -208.457] - loss: 21541.250 - mae: 2292.632 - mean_q: 3079.781 Interval 3679 (1839000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6059 4 episodes - episode_reward: -140.468 [-203.251, -100.000] - loss: 19191.119 - mae: 2249.622 - mean_q: 3023.082 Interval 3680 (1839500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6236 3 episodes - episode_reward: -323.904 [-817.571, -54.141] - loss: 18736.812 - mae: 2223.641 - mean_q: 2983.672 Interval 3681 (1840000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0357 2 episodes - episode_reward: -551.679 [-609.754, -493.603] - loss: 18216.404 - mae: 2174.739 - mean_q: 2920.238 Interval 3682 (1840500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4195 4 episodes - episode_reward: -176.412 [-219.807, -133.579] - loss: 16853.828 - mae: 2105.013 - mean_q: 2826.044 Interval 3683 (1841000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4903 5 episodes - episode_reward: -230.552 [-617.917, 48.181] - loss: 15881.869 - mae: 2064.407 - mean_q: 2774.131 Interval 3684 (1841500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.3044 3 episodes - episode_reward: -393.266 [-866.045, -149.661] - loss: 14296.773 - mae: 2011.791 - mean_q: 2701.620 Interval 3685 (1842000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4649 5 episodes - episode_reward: -356.758 [-702.583, -111.965] - loss: 18616.658 - mae: 1930.318 - mean_q: 2590.420 Interval 3686 (1842500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8369 2 episodes - episode_reward: -478.091 [-574.826, -381.357] - loss: 14004.356 - mae: 1894.189 - mean_q: 2543.207 Interval 3687 (1843000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9776 6 episodes - episode_reward: -223.212 [-496.176, -100.000] - loss: 16599.578 - mae: 1844.289 - mean_q: 2470.971 Interval 3688 (1843500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.3087 6 episodes - episode_reward: -297.005 [-711.068, -108.176] - loss: 12172.399 - mae: 1795.708 - mean_q: 2410.173 Interval 3689 (1844000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1104 3 episodes - episode_reward: -318.064 [-398.946, -187.804] - loss: 13959.315 - mae: 1737.065 - mean_q: 2329.299 Interval 3690 (1844500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2999 2 episodes - episode_reward: -508.093 [-691.848, -324.338] - loss: 12851.372 - mae: 1724.489 - mean_q: 2315.773 Interval 3691 (1845000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.6033 4 episodes - episode_reward: -337.666 [-457.719, -244.580] - loss: 13498.277 - mae: 1696.369 - mean_q: 2277.846 Interval 3692 (1845500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7965 5 episodes - episode_reward: -197.513 [-348.322, -64.684] - loss: 12973.726 - mae: 1673.505 - mean_q: 2245.386 Interval 3693 (1846000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3421 3 episodes - episode_reward: -467.559 [-655.053, -161.161] - loss: 11882.967 - mae: 1644.562 - mean_q: 2205.575 Interval 3694 (1846500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4684 4 episodes - episode_reward: -280.774 [-452.296, -72.968] - loss: 11997.375 - mae: 1626.595 - mean_q: 2183.117 Interval 3695 (1847000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8318 3 episodes - episode_reward: -385.798 [-458.584, -304.053] - loss: 10413.375 - mae: 1599.058 - mean_q: 2146.900 Interval 3696 (1847500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6017 3 episodes - episode_reward: -353.628 [-477.639, -269.455] - loss: 10693.050 - mae: 1605.614 - mean_q: 2153.994 Interval 3697 (1848000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2449 3 episodes - episode_reward: -289.343 [-610.050, -63.992] - loss: 10666.485 - mae: 1586.306 - mean_q: 2130.482 Interval 3698 (1848500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9623 3 episodes - episode_reward: -403.551 [-799.746, -26.476] - loss: 10841.753 - mae: 1580.491 - mean_q: 2123.400 Interval 3699 (1849000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5978 2 episodes - episode_reward: -240.463 [-366.268, -114.658] - loss: 11573.645 - mae: 1587.449 - mean_q: 2135.227 Interval 3700 (1849500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6164 4 episodes - episode_reward: -285.252 [-457.002, -199.265] - loss: 11643.252 - mae: 1581.633 - mean_q: 2128.532 Interval 3701 (1850000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6180 3 episodes - episode_reward: -103.388 [-251.638, 21.724] - loss: 10904.541 - mae: 1599.016 - mean_q: 2153.878 Interval 3702 (1850500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5300 2 episodes - episode_reward: -315.766 [-343.620, -287.912] - loss: 11943.384 - mae: 1610.449 - mean_q: 2172.444 Interval 3703 (1851000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0654 2 episodes - episode_reward: -517.927 [-873.034, -162.819] - loss: 10661.031 - mae: 1620.733 - mean_q: 2190.067 Interval 3704 (1851500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9614 1 episodes - episode_reward: -778.331 [-778.331, -778.331] - loss: 11443.306 - mae: 1660.408 - mean_q: 2243.351 Interval 3705 (1852000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3015 3 episodes - episode_reward: -661.033 [-960.402, -100.000] - loss: 11931.666 - mae: 1664.068 - mean_q: 2247.890 Interval 3706 (1852500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6796 2 episodes - episode_reward: -53.611 [-262.703, 155.482] - loss: 9779.296 - mae: 1689.502 - mean_q: 2281.784 Interval 3707 (1853000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6800 3 episodes - episode_reward: -350.094 [-435.483, -265.762] - loss: 11253.367 - mae: 1685.718 - mean_q: 2275.145 Interval 3708 (1853500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9643 6 episodes - episode_reward: -247.305 [-419.936, -99.527] - loss: 9440.610 - mae: 1690.428 - mean_q: 2281.013 Interval 3709 (1854000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7472 3 episodes - episode_reward: -146.712 [-190.702, -109.185] - loss: 10219.420 - mae: 1679.703 - mean_q: 2261.905 Interval 3710 (1854500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9940 1 episodes - episode_reward: -934.058 [-934.058, -934.058] - loss: 10807.618 - mae: 1683.017 - mean_q: 2264.296 Interval 3711 (1855000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5816 3 episodes - episode_reward: -501.659 [-1189.032, -107.088] - loss: 11780.417 - mae: 1669.586 - mean_q: 2244.005 Interval 3712 (1855500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5965 2 episodes - episode_reward: -798.552 [-1294.587, -302.517] - loss: 10680.541 - mae: 1662.537 - mean_q: 2234.395 Interval 3713 (1856000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1230 Interval 3714 (1856500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9952 2 episodes - episode_reward: -518.141 [-841.562, -194.721] - loss: 11781.486 - mae: 1622.905 - mean_q: 2180.351 Interval 3715 (1857000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1878 2 episodes - episode_reward: -190.687 [-269.001, -112.373] - loss: 10748.317 - mae: 1585.949 - mean_q: 2129.251 Interval 3716 (1857500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7373 2 episodes - episode_reward: -454.621 [-470.254, -438.988] - loss: 10196.620 - mae: 1556.463 - mean_q: 2090.851 Interval 3717 (1858000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6275 2 episodes - episode_reward: -585.389 [-853.515, -317.263] - loss: 9525.548 - mae: 1532.966 - mean_q: 2059.014 Interval 3718 (1858500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6281 3 episodes - episode_reward: -271.754 [-615.263, -100.000] - loss: 9611.269 - mae: 1511.685 - mean_q: 2032.944 Interval 3719 (1859000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6735 1 episodes - episode_reward: -963.104 [-963.104, -963.104] - loss: 10771.467 - mae: 1508.473 - mean_q: 2028.624 Interval 3720 (1859500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8511 1 episodes - episode_reward: -1133.816 [-1133.816, -1133.816] - loss: 9385.563 - mae: 1507.046 - mean_q: 2025.582 Interval 3721 (1860000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6462 2 episodes - episode_reward: -454.849 [-567.024, -342.673] - loss: 10106.140 - mae: 1485.153 - mean_q: 1998.852 Interval 3722 (1860500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5459 1 episodes - episode_reward: -1084.430 [-1084.430, -1084.430] - loss: 11153.220 - mae: 1491.939 - mean_q: 2009.197 Interval 3723 (1861000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1916 2 episodes - episode_reward: -522.045 [-925.443, -118.647] - loss: 10590.114 - mae: 1489.532 - mean_q: 2004.568 Interval 3724 (1861500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5646 3 episodes - episode_reward: -479.764 [-586.543, -347.811] - loss: 11594.860 - mae: 1487.151 - mean_q: 1998.808 Interval 3725 (1862000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.6650 2 episodes - episode_reward: -848.211 [-1020.146, -676.276] - loss: 9020.065 - mae: 1448.748 - mean_q: 1948.019 Interval 3726 (1862500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2478 1 episodes - episode_reward: -660.443 [-660.443, -660.443] - loss: 9515.210 - mae: 1414.103 - mean_q: 1899.232 Interval 3727 (1863000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6959 2 episodes - episode_reward: -683.815 [-984.818, -382.813] - loss: 7718.424 - mae: 1382.502 - mean_q: 1854.530 Interval 3728 (1863500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9798 2 episodes - episode_reward: -728.047 [-1324.407, -131.687] - loss: 9075.520 - mae: 1395.864 - mean_q: 1874.567 Interval 3729 (1864000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4046 1 episodes - episode_reward: -511.398 [-511.398, -511.398] - loss: 8920.733 - mae: 1336.510 - mean_q: 1792.585 Interval 3730 (1864500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5781 1 episodes - episode_reward: -519.173 [-519.173, -519.173] - loss: 6766.750 - mae: 1322.101 - mean_q: 1774.473 Interval 3731 (1865000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4799 Interval 3732 (1865500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1410 2 episodes - episode_reward: -834.551 [-1117.146, -551.955] - loss: 8681.764 - mae: 1249.245 - mean_q: 1675.370 Interval 3733 (1866000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4151 1 episodes - episode_reward: -680.856 [-680.856, -680.856] - loss: 7156.426 - mae: 1230.042 - mean_q: 1652.989 Interval 3734 (1866500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1394 1 episodes - episode_reward: -445.122 [-445.122, -445.122] - loss: 7239.516 - mae: 1215.736 - mean_q: 1631.949 Interval 3735 (1867000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6585 2 episodes - episode_reward: -408.972 [-518.675, -299.270] - loss: 7476.135 - mae: 1168.900 - mean_q: 1570.371 Interval 3736 (1867500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8279 Interval 3737 (1868000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9645 2 episodes - episode_reward: -578.861 [-940.534, -217.188] - loss: 6369.564 - mae: 1118.373 - mean_q: 1501.563 Interval 3738 (1868500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -6.7436 5 episodes - episode_reward: -678.047 [-1268.247, -319.061] - loss: 5953.026 - mae: 1093.288 - mean_q: 1466.934 Interval 3739 (1869000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.6266 4 episodes - episode_reward: -341.805 [-506.769, -122.445] - loss: 5495.375 - mae: 1070.489 - mean_q: 1436.653 Interval 3740 (1869500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.2999 3 episodes - episode_reward: -407.757 [-539.887, -245.923] - loss: 6206.018 - mae: 1032.358 - mean_q: 1384.168 Interval 3741 (1870000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.6090 4 episodes - episode_reward: -467.996 [-724.287, -225.250] - loss: 5131.441 - mae: 1009.562 - mean_q: 1355.445 Interval 3742 (1870500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.5995 5 episodes - episode_reward: -538.654 [-1400.557, -214.088] - loss: 5266.534 - mae: 977.112 - mean_q: 1312.334 Interval 3743 (1871000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.8072 4 episodes - episode_reward: -310.628 [-436.097, -190.627] - loss: 5834.997 - mae: 979.360 - mean_q: 1316.032 Interval 3744 (1871500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7336 4 episodes - episode_reward: -240.245 [-324.268, -148.573] - loss: 4987.857 - mae: 973.260 - mean_q: 1308.439 Interval 3745 (1872000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8462 1 episodes - episode_reward: -715.475 [-715.475, -715.475] - loss: 5055.327 - mae: 965.301 - mean_q: 1295.326 Interval 3746 (1872500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.3876 6 episodes - episode_reward: -320.442 [-698.966, -41.750] - loss: 5397.858 - mae: 947.915 - mean_q: 1270.393 Interval 3747 (1873000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3075 6 episodes - episode_reward: -283.850 [-437.952, -104.837] - loss: 4675.142 - mae: 931.240 - mean_q: 1247.189 Interval 3748 (1873500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3247 4 episodes - episode_reward: -379.860 [-698.922, -144.396] - loss: 4827.761 - mae: 928.284 - mean_q: 1243.892 Interval 3749 (1874000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.5742 5 episodes - episode_reward: -483.856 [-823.631, -166.831] - loss: 4984.903 - mae: 897.602 - mean_q: 1201.059 Interval 3750 (1874500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7493 5 episodes - episode_reward: -276.810 [-439.345, -162.869] - loss: 6850.229 - mae: 880.491 - mean_q: 1176.787 Interval 3751 (1875000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.7126 3 episodes - episode_reward: -457.409 [-756.900, -206.820] - loss: 5456.357 - mae: 862.468 - mean_q: 1152.781 Interval 3752 (1875500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8568 4 episodes - episode_reward: -441.669 [-703.130, -177.910] - loss: 5274.554 - mae: 841.128 - mean_q: 1124.052 Interval 3753 (1876000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1116 1 episodes - episode_reward: -717.033 [-717.033, -717.033] - loss: 6004.353 - mae: 833.154 - mean_q: 1115.203 Interval 3754 (1876500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4389 1 episodes - episode_reward: -583.377 [-583.377, -583.377] - loss: 5630.673 - mae: 843.381 - mean_q: 1130.056 Interval 3755 (1877000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1609 2 episodes - episode_reward: -588.808 [-801.533, -376.084] - loss: 5267.690 - mae: 851.626 - mean_q: 1142.876 Interval 3756 (1877500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8873 Interval 3757 (1878000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.3465 2 episodes - episode_reward: -793.150 [-910.306, -675.995] - loss: 4437.708 - mae: 852.326 - mean_q: 1144.272 Interval 3758 (1878500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5746 3 episodes - episode_reward: -410.210 [-560.968, -207.444] - loss: 4712.083 - mae: 874.795 - mean_q: 1175.198 Interval 3759 (1879000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7952 1 episodes - episode_reward: -1045.298 [-1045.298, -1045.298] - loss: 5254.450 - mae: 858.677 - mean_q: 1152.995 Interval 3760 (1879500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2338 1 episodes - episode_reward: -881.885 [-881.885, -881.885] - loss: 4837.795 - mae: 890.105 - mean_q: 1195.734 Interval 3761 (1880000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7149 1 episodes - episode_reward: -1062.212 [-1062.212, -1062.212] - loss: 5475.059 - mae: 891.005 - mean_q: 1195.588 Interval 3762 (1880500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3765 2 episodes - episode_reward: -565.831 [-600.186, -531.475] - loss: 5833.862 - mae: 892.323 - mean_q: 1197.821 Interval 3763 (1881000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5804 2 episodes - episode_reward: -351.423 [-533.202, -169.643] - loss: 5315.825 - mae: 907.823 - mean_q: 1217.307 Interval 3764 (1881500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6292 1 episodes - episode_reward: -550.322 [-550.322, -550.322] - loss: 4491.962 - mae: 932.498 - mean_q: 1250.117 Interval 3765 (1882000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9501 3 episodes - episode_reward: -378.883 [-569.747, -270.489] - loss: 5863.881 - mae: 941.096 - mean_q: 1260.923 Interval 3766 (1882500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2385 1 episodes - episode_reward: -830.451 [-830.451, -830.451] - loss: 5275.041 - mae: 950.706 - mean_q: 1272.083 Interval 3767 (1883000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8867 Interval 3768 (1883500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1335 1 episodes - episode_reward: -608.341 [-608.341, -608.341] - loss: 5922.793 - mae: 990.554 - mean_q: 1332.440 Interval 3769 (1884000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6327 1 episodes - episode_reward: -525.776 [-525.776, -525.776] - loss: 5297.807 - mae: 1027.811 - mean_q: 1389.003 Interval 3770 (1884500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1313 1 episodes - episode_reward: -707.243 [-707.243, -707.243] - loss: 6056.190 - mae: 1059.855 - mean_q: 1432.646 Interval 3771 (1885000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9300 Interval 3772 (1885500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.6134 3 episodes - episode_reward: -528.949 [-1324.286, -100.000] - loss: 5665.022 - mae: 1132.701 - mean_q: 1534.355 Interval 3773 (1886000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5204 1 episodes - episode_reward: -961.626 [-961.626, -961.626] - loss: 7543.045 - mae: 1165.632 - mean_q: 1576.323 Interval 3774 (1886500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6265 Interval 3775 (1887000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.9284 Interval 3776 (1887500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.5092 2 episodes - episode_reward: -2382.761 [-4609.612, -155.909] - loss: 5834.929 - mae: 1249.113 - mean_q: 1689.999 Interval 3777 (1888000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.1773 Interval 3778 (1888500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0108 1 episodes - episode_reward: -2517.139 [-2517.139, -2517.139] - loss: 6302.681 - mae: 1261.954 - mean_q: 1705.923 Interval 3779 (1889000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.5745 1 episodes - episode_reward: -2034.321 [-2034.321, -2034.321] - loss: 6195.083 - mae: 1267.595 - mean_q: 1710.831 Interval 3780 (1889500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.6154 1 episodes - episode_reward: -2376.220 [-2376.220, -2376.220] - loss: 5365.015 - mae: 1261.950 - mean_q: 1701.562 Interval 3781 (1890000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.8211 1 episodes - episode_reward: -1184.279 [-1184.279, -1184.279] - loss: 5274.560 - mae: 1222.444 - mean_q: 1644.830 Interval 3782 (1890500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0600 1 episodes - episode_reward: -481.470 [-481.470, -481.470] - loss: 5498.044 - mae: 1245.681 - mean_q: 1678.002 Interval 3783 (1891000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4352 2 episodes - episode_reward: -783.370 [-1078.767, -487.973] - loss: 7770.879 - mae: 1210.016 - mean_q: 1629.249 Interval 3784 (1891500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.5618 2 episodes - episode_reward: -458.767 [-604.222, -313.312] - loss: 5620.230 - mae: 1178.960 - mean_q: 1586.319 Interval 3785 (1892000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1856 4 episodes - episode_reward: -324.610 [-568.719, -97.412] - loss: 4640.475 - mae: 1177.923 - mean_q: 1584.736 Interval 3786 (1892500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3555 2 episodes - episode_reward: -415.328 [-422.501, -408.155] - loss: 7024.787 - mae: 1172.076 - mean_q: 1576.853 Interval 3787 (1893000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3506 2 episodes - episode_reward: -464.600 [-536.495, -392.704] - loss: 6512.186 - mae: 1177.971 - mean_q: 1583.281 Interval 3788 (1893500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.1067 6 episodes - episode_reward: -302.612 [-494.817, -175.985] - loss: 5506.503 - mae: 1165.603 - mean_q: 1564.763 Interval 3789 (1894000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4678 2 episodes - episode_reward: -341.693 [-415.382, -268.004] - loss: 9930.607 - mae: 1158.909 - mean_q: 1554.566 Interval 3790 (1894500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2815 5 episodes - episode_reward: -238.410 [-296.420, -171.038] - loss: 5730.439 - mae: 1155.994 - mean_q: 1549.625 Interval 3791 (1895000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4580 2 episodes - episode_reward: -517.486 [-539.848, -495.124] - loss: 5395.578 - mae: 1142.674 - mean_q: 1533.461 Interval 3792 (1895500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.5063 4 episodes - episode_reward: -363.821 [-671.720, -109.029] - loss: 7220.422 - mae: 1136.283 - mean_q: 1523.835 Interval 3793 (1896000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1292 2 episodes - episode_reward: -271.866 [-332.108, -211.625] - loss: 10354.148 - mae: 1142.655 - mean_q: 1533.271 Interval 3794 (1896500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.6468 4 episodes - episode_reward: -211.848 [-460.931, -80.715] - loss: 6621.586 - mae: 1138.756 - mean_q: 1529.199 Interval 3795 (1897000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9117 4 episodes - episode_reward: -235.128 [-328.578, 11.811] - loss: 6616.981 - mae: 1121.318 - mean_q: 1505.744 Interval 3796 (1897500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5816 2 episodes - episode_reward: -378.019 [-549.565, -206.472] - loss: 9181.424 - mae: 1124.815 - mean_q: 1511.619 Interval 3797 (1898000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8665 4 episodes - episode_reward: -230.293 [-317.731, -194.723] - loss: 11032.067 - mae: 1122.373 - mean_q: 1506.554 Interval 3798 (1898500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0889 5 episodes - episode_reward: -306.247 [-681.115, -127.382] - loss: 8177.660 - mae: 1111.204 - mean_q: 1492.794 Interval 3799 (1899000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6605 1 episodes - episode_reward: -332.984 [-332.984, -332.984] - loss: 6663.827 - mae: 1132.646 - mean_q: 1522.986 Interval 3800 (1899500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8674 3 episodes - episode_reward: -329.222 [-621.645, -170.363] - loss: 10313.073 - mae: 1134.539 - mean_q: 1526.820 Interval 3801 (1900000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6693 3 episodes - episode_reward: -275.094 [-594.959, -100.000] - loss: 7462.146 - mae: 1124.814 - mean_q: 1515.223 Interval 3802 (1900500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6809 1 episodes - episode_reward: -333.498 [-333.498, -333.498] - loss: 12937.164 - mae: 1163.316 - mean_q: 1568.386 Interval 3803 (1901000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.5514 1 episodes - episode_reward: -1216.178 [-1216.178, -1216.178] - loss: 7745.801 - mae: 1164.469 - mean_q: 1569.109 Interval 3804 (1901500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.2971 1 episodes - episode_reward: -1537.973 [-1537.973, -1537.973] - loss: 9707.010 - mae: 1173.053 - mean_q: 1580.929 Interval 3805 (1902000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.2484 1 episodes - episode_reward: -2198.830 [-2198.830, -2198.830] - loss: 12382.122 - mae: 1202.624 - mean_q: 1621.435 Interval 3806 (1902500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.0648 4 episodes - episode_reward: -392.795 [-756.822, -132.661] - loss: 8686.331 - mae: 1178.689 - mean_q: 1588.830 Interval 3807 (1903000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4723 1 episodes - episode_reward: -178.129 [-178.129, -178.129] - loss: 7372.556 - mae: 1192.451 - mean_q: 1608.249 Interval 3808 (1903500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.5043 2 episodes - episode_reward: -1183.878 [-1238.492, -1129.263] - loss: 6785.576 - mae: 1186.692 - mean_q: 1599.328 Interval 3809 (1904000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.6260 1 episodes - episode_reward: -1248.179 [-1248.179, -1248.179] - loss: 8398.315 - mae: 1195.720 - mean_q: 1608.657 Interval 3810 (1904500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.9477 Interval 3811 (1905000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -7.0961 1 episodes - episode_reward: -4845.103 [-4845.103, -4845.103] - loss: 6216.436 - mae: 1143.013 - mean_q: 1534.557 Interval 3812 (1905500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.4884 3 episodes - episode_reward: -713.205 [-1189.409, -144.680] - loss: 6778.659 - mae: 1127.582 - mean_q: 1511.031 Interval 3813 (1906000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0622 2 episodes - episode_reward: -631.145 [-1011.481, -250.809] - loss: 10595.741 - mae: 1110.551 - mean_q: 1486.961 Interval 3814 (1906500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4238 2 episodes - episode_reward: -655.457 [-849.019, -461.896] - loss: 6314.544 - mae: 1081.083 - mean_q: 1450.052 Interval 3815 (1907000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -5.1217 3 episodes - episode_reward: -810.630 [-1179.279, -430.354] - loss: 6436.408 - mae: 1068.733 - mean_q: 1435.387 Interval 3816 (1907500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6929 3 episodes - episode_reward: -316.892 [-417.113, -265.674] - loss: 4939.321 - mae: 1069.273 - mean_q: 1438.618 Interval 3817 (1908000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1257 1 episodes - episode_reward: -539.284 [-539.284, -539.284] - loss: 5209.925 - mae: 1062.702 - mean_q: 1432.589 Interval 3818 (1908500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.0940 2 episodes - episode_reward: -1060.956 [-1911.114, -210.798] - loss: 5043.463 - mae: 1073.197 - mean_q: 1449.248 Interval 3819 (1909000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.3019 2 episodes - episode_reward: -527.215 [-935.036, -119.393] - loss: 5252.542 - mae: 1070.205 - mean_q: 1443.496 Interval 3820 (1909500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4244 1 episodes - episode_reward: -912.893 [-912.893, -912.893] - loss: 6250.887 - mae: 1087.284 - mean_q: 1466.747 Interval 3821 (1910000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.6143 2 episodes - episode_reward: -931.191 [-1505.243, -357.140] - loss: 6124.292 - mae: 1075.860 - mean_q: 1449.900 Interval 3822 (1910500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -8.5424 1 episodes - episode_reward: -5095.123 [-5095.123, -5095.123] - loss: 4701.450 - mae: 1049.673 - mean_q: 1415.875 Interval 3823 (1911000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0079 5 episodes - episode_reward: -210.068 [-278.561, -90.619] - loss: 5227.418 - mae: 1056.206 - mean_q: 1423.474 Interval 3824 (1911500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.8254 1 episodes - episode_reward: -2103.112 [-2103.112, -2103.112] - loss: 4814.175 - mae: 1020.723 - mean_q: 1378.141 Interval 3825 (1912000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.4877 3 episodes - episode_reward: -998.071 [-2729.209, -100.000] - loss: 4921.140 - mae: 1010.833 - mean_q: 1361.548 Interval 3826 (1912500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.5903 1 episodes - episode_reward: -2781.413 [-2781.413, -2781.413] - loss: 4561.654 - mae: 1012.725 - mean_q: 1365.197 Interval 3827 (1913000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1231 4 episodes - episode_reward: -395.938 [-1129.303, -119.904] - loss: 5132.580 - mae: 998.606 - mean_q: 1342.988 Interval 3828 (1913500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4792 2 episodes - episode_reward: -673.382 [-1186.191, -160.574] - loss: 4650.266 - mae: 983.899 - mean_q: 1322.175 Interval 3829 (1914000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.4472 2 episodes - episode_reward: -871.753 [-1395.776, -347.730] - loss: 4096.549 - mae: 962.006 - mean_q: 1290.820 Interval 3830 (1914500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -6.0714 1 episodes - episode_reward: -3359.986 [-3359.986, -3359.986] - loss: 4638.689 - mae: 955.599 - mean_q: 1280.353 Interval 3831 (1915000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.5746 4 episodes - episode_reward: -444.288 [-711.509, -225.789] - loss: 4155.214 - mae: 927.223 - mean_q: 1239.628 Interval 3832 (1915500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.6435 1 episodes - episode_reward: -1611.065 [-1611.065, -1611.065] - loss: 4180.058 - mae: 906.703 - mean_q: 1209.158 Interval 3833 (1916000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1365 3 episodes - episode_reward: -419.569 [-681.007, -262.702] - loss: 3401.551 - mae: 887.860 - mean_q: 1180.315 Interval 3834 (1916500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2968 3 episodes - episode_reward: -368.410 [-694.256, -141.774] - loss: 3778.629 - mae: 857.151 - mean_q: 1135.987 Interval 3835 (1917000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4141 Interval 3836 (1917500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.7367 2 episodes - episode_reward: -1591.574 [-2381.026, -802.123] - loss: 4359.119 - mae: 835.203 - mean_q: 1101.227 Interval 3837 (1918000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8204 1 episodes - episode_reward: -411.904 [-411.904, -411.904] - loss: 3721.080 - mae: 813.057 - mean_q: 1075.766 Interval 3838 (1918500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1012 1 episodes - episode_reward: -1366.955 [-1366.955, -1366.955] - loss: 3518.581 - mae: 799.978 - mean_q: 1055.866 Interval 3839 (1919000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2391 2 episodes - episode_reward: -343.516 [-365.530, -321.503] - loss: 3553.231 - mae: 798.142 - mean_q: 1053.329 Interval 3840 (1919500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1264 4 episodes - episode_reward: -306.149 [-405.350, -161.830] - loss: 3733.123 - mae: 791.486 - mean_q: 1043.652 Interval 3841 (1920000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7263 1 episodes - episode_reward: -201.763 [-201.763, -201.763] - loss: 3882.582 - mae: 796.040 - mean_q: 1042.494 Interval 3842 (1920500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8055 3 episodes - episode_reward: -258.231 [-438.952, -100.000] - loss: 3725.805 - mae: 790.849 - mean_q: 1035.481 Interval 3843 (1921000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.1296 5 episodes - episode_reward: -371.410 [-614.770, -178.234] - loss: 3634.821 - mae: 788.156 - mean_q: 1029.770 Interval 3844 (1921500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8831 3 episodes - episode_reward: -315.395 [-542.105, -191.781] - loss: 3854.220 - mae: 797.920 - mean_q: 1042.970 Interval 3845 (1922000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.7511 5 episodes - episode_reward: -276.015 [-478.110, -134.529] - loss: 4191.303 - mae: 799.038 - mean_q: 1045.116 Interval 3846 (1922500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7742 Interval 3847 (1923000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6800 4 episodes - episode_reward: -314.875 [-472.849, -101.406] - loss: 3930.406 - mae: 841.162 - mean_q: 1109.206 Interval 3848 (1923500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -1.3472 Interval 3849 (1924000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -5.2907 3 episodes - episode_reward: -1093.343 [-2335.126, -113.811] - loss: 4062.265 - mae: 862.286 - mean_q: 1137.169 Interval 3850 (1924500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7039 1 episodes - episode_reward: -725.265 [-725.265, -725.265] - loss: 3883.600 - mae: 883.284 - mean_q: 1166.604 Interval 3851 (1925000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2173 Interval 3852 (1925500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.4663 1 episodes - episode_reward: -1290.969 [-1290.969, -1290.969] - loss: 4054.608 - mae: 895.304 - mean_q: 1180.590 Interval 3853 (1926000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2806 1 episodes - episode_reward: -146.341 [-146.341, -146.341] - loss: 3744.229 - mae: 888.456 - mean_q: 1172.447 Interval 3854 (1926500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5072 1 episodes - episode_reward: -319.829 [-319.829, -319.829] - loss: 3723.915 - mae: 881.021 - mean_q: 1159.351 Interval 3855 (1927000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.6435 1 episodes - episode_reward: -1319.814 [-1319.814, -1319.814] - loss: 3479.937 - mae: 871.115 - mean_q: 1141.595 Interval 3856 (1927500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.3483 3 episodes - episode_reward: -747.213 [-1474.910, -346.060] - loss: 3315.750 - mae: 861.231 - mean_q: 1124.356 Interval 3857 (1928000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1475 6 episodes - episode_reward: -161.222 [-300.312, -100.000] - loss: 3401.658 - mae: 840.976 - mean_q: 1092.472 Interval 3858 (1928500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3959 1 episodes - episode_reward: -849.206 [-849.206, -849.206] - loss: 3336.031 - mae: 822.923 - mean_q: 1066.072 Interval 3859 (1929000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.7851 6 episodes - episode_reward: -383.809 [-746.048, -117.393] - loss: 3081.241 - mae: 804.853 - mean_q: 1038.669 Interval 3860 (1929500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.2289 5 episodes - episode_reward: -344.256 [-625.991, -156.631] - loss: 3308.874 - mae: 778.994 - mean_q: 1002.640 Interval 3861 (1930000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.7494 4 episodes - episode_reward: -583.059 [-983.988, -132.876] - loss: 3087.780 - mae: 756.793 - mean_q: 970.192 Interval 3862 (1930500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0054 5 episodes - episode_reward: -273.846 [-436.983, -94.889] - loss: 3190.115 - mae: 739.521 - mean_q: 945.586 Interval 3863 (1931000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.0483 4 episodes - episode_reward: -540.293 [-1045.460, -299.903] - loss: 3197.289 - mae: 723.018 - mean_q: 924.389 Interval 3864 (1931500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.5525 5 episodes - episode_reward: -453.428 [-955.021, -195.262] - loss: 2929.157 - mae: 706.385 - mean_q: 897.898 Interval 3865 (1932000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.4719 5 episodes - episode_reward: -441.641 [-593.208, -373.770] - loss: 2998.935 - mae: 684.466 - mean_q: 867.060 Interval 3866 (1932500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.9783 4 episodes - episode_reward: -506.059 [-669.713, -356.580] - loss: 2980.059 - mae: 669.595 - mean_q: 844.530 Interval 3867 (1933000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.5693 3 episodes - episode_reward: -699.866 [-1169.348, -129.150] - loss: 3119.923 - mae: 653.643 - mean_q: 819.135 Interval 3868 (1933500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.2222 3 episodes - episode_reward: -734.592 [-1011.871, -468.760] - loss: 2846.298 - mae: 630.693 - mean_q: 786.871 Interval 3869 (1934000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.1841 3 episodes - episode_reward: -604.060 [-723.055, -480.279] - loss: 3118.018 - mae: 622.063 - mean_q: 780.921 Interval 3870 (1934500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.3255 7 episodes - episode_reward: -328.681 [-775.866, -100.000] - loss: 3048.760 - mae: 595.584 - mean_q: 745.248 Interval 3871 (1935000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.2318 4 episodes - episode_reward: -528.314 [-1132.985, -136.486] - loss: 2825.097 - mae: 583.645 - mean_q: 732.198 Interval 3872 (1935500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.4725 5 episodes - episode_reward: -499.228 [-795.450, -285.522] - loss: 3292.458 - mae: 578.925 - mean_q: 731.136 Interval 3873 (1936000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.4944 4 episodes - episode_reward: -563.109 [-850.547, -350.839] - loss: 3728.799 - mae: 585.420 - mean_q: 742.970 Interval 3874 (1936500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3092 2 episodes - episode_reward: -560.080 [-699.398, -420.761] - loss: 3312.409 - mae: 596.859 - mean_q: 760.166 Interval 3875 (1937000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7020 4 episodes - episode_reward: -338.392 [-439.707, -244.946] - loss: 3954.938 - mae: 626.849 - mean_q: 804.949 Interval 3876 (1937500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.5073 3 episodes - episode_reward: -558.521 [-804.150, -201.964] - loss: 4610.972 - mae: 667.942 - mean_q: 866.760 Interval 3877 (1938000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9306 4 episodes - episode_reward: -377.692 [-862.033, -97.040] - loss: 4939.227 - mae: 703.881 - mean_q: 917.389 Interval 3878 (1938500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.3474 4 episodes - episode_reward: -550.073 [-1176.731, -144.463] - loss: 5919.944 - mae: 746.499 - mean_q: 975.982 Interval 3879 (1939000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0199 1 episodes - episode_reward: -874.793 [-874.793, -874.793] - loss: 6586.230 - mae: 802.417 - mean_q: 1050.914 Interval 3880 (1939500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0063 3 episodes - episode_reward: -471.566 [-665.952, -360.667] - loss: 8280.236 - mae: 884.536 - mean_q: 1162.134 Interval 3881 (1940000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.4215 2 episodes - episode_reward: -615.010 [-668.076, -561.944] - loss: 9842.579 - mae: 942.741 - mean_q: 1229.650 Interval 3882 (1940500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.2995 5 episodes - episode_reward: -466.727 [-1094.579, -104.667] - loss: 9948.778 - mae: 975.928 - mean_q: 1271.540 Interval 3883 (1941000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.4533 3 episodes - episode_reward: -743.708 [-1424.744, -286.020] - loss: 9368.131 - mae: 991.441 - mean_q: 1288.738 Interval 3884 (1941500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.6148 5 episodes - episode_reward: -247.053 [-496.386, -122.059] - loss: 9771.994 - mae: 1022.121 - mean_q: 1328.683 Interval 3885 (1942000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.6889 3 episodes - episode_reward: -710.149 [-1182.523, -350.665] - loss: 10686.001 - mae: 1013.560 - mean_q: 1318.119 Interval 3886 (1942500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1407 6 episodes - episode_reward: -305.905 [-489.854, -182.961] - loss: 9940.950 - mae: 1026.324 - mean_q: 1336.970 Interval 3887 (1943000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2802 2 episodes - episode_reward: -809.109 [-825.803, -792.416] - loss: 10330.597 - mae: 1054.115 - mean_q: 1378.009 Interval 3888 (1943500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.4531 5 episodes - episode_reward: -436.699 [-1123.917, -100.000] - loss: 10678.253 - mae: 1083.702 - mean_q: 1424.404 Interval 3889 (1944000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5271 3 episodes - episode_reward: -330.556 [-445.850, -143.497] - loss: 9710.135 - mae: 1125.160 - mean_q: 1486.741 Interval 3890 (1944500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3895 4 episodes - episode_reward: -523.341 [-713.693, -380.891] - loss: 10424.503 - mae: 1146.745 - mean_q: 1520.787 Interval 3891 (1945000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.9362 5 episodes - episode_reward: -392.120 [-832.072, -190.571] - loss: 10225.930 - mae: 1183.381 - mean_q: 1570.199 Interval 3892 (1945500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2032 3 episodes - episode_reward: -418.161 [-460.148, -372.630] - loss: 10342.837 - mae: 1235.180 - mean_q: 1641.577 Interval 3893 (1946000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.6278 5 episodes - episode_reward: -533.196 [-823.162, -384.083] - loss: 10064.451 - mae: 1265.893 - mean_q: 1686.619 Interval 3894 (1946500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6337 2 episodes - episode_reward: -502.396 [-561.996, -442.795] - loss: 10876.890 - mae: 1247.182 - mean_q: 1654.642 Interval 3895 (1947000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.0695 1 episodes - episode_reward: -2162.767 [-2162.767, -2162.767] - loss: 9823.347 - mae: 1265.779 - mean_q: 1674.980 Interval 3896 (1947500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.0992 4 episodes - episode_reward: -474.322 [-784.529, -124.966] - loss: 10706.736 - mae: 1278.139 - mean_q: 1679.553 Interval 3897 (1948000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2188 4 episodes - episode_reward: -291.037 [-581.270, -122.224] - loss: 10419.935 - mae: 1288.954 - mean_q: 1692.172 Interval 3898 (1948500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4335 4 episodes - episode_reward: -497.016 [-907.891, -268.908] - loss: 10487.806 - mae: 1307.244 - mean_q: 1711.918 Interval 3899 (1949000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1331 4 episodes - episode_reward: -269.360 [-500.083, -102.134] - loss: 11662.038 - mae: 1300.066 - mean_q: 1700.203 Interval 3900 (1949500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5318 6 episodes - episode_reward: -208.167 [-445.545, -83.634] - loss: 11785.288 - mae: 1320.479 - mean_q: 1723.938 Interval 3901 (1950000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.8983 7 episodes - episode_reward: -280.146 [-566.104, -129.051] - loss: 11690.359 - mae: 1332.652 - mean_q: 1741.228 Interval 3902 (1950500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9395 4 episodes - episode_reward: -349.798 [-487.569, -261.762] - loss: 13086.422 - mae: 1342.257 - mean_q: 1754.139 Interval 3903 (1951000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.7582 5 episodes - episode_reward: -378.228 [-501.827, -100.000] - loss: 12899.250 - mae: 1347.078 - mean_q: 1766.023 Interval 3904 (1951500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8002 5 episodes - episode_reward: -253.793 [-357.492, -159.046] - loss: 13360.773 - mae: 1349.924 - mean_q: 1768.148 Interval 3905 (1952000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.8538 5 episodes - episode_reward: -423.792 [-641.336, -190.000] - loss: 16870.586 - mae: 1383.921 - mean_q: 1811.865 Interval 3906 (1952500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5350 6 episodes - episode_reward: -296.412 [-470.153, -114.866] - loss: 16596.207 - mae: 1366.792 - mean_q: 1786.188 Interval 3907 (1953000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9453 1 episodes - episode_reward: -1459.558 [-1459.558, -1459.558] - loss: 16567.623 - mae: 1335.542 - mean_q: 1739.666 Interval 3908 (1953500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9104 4 episodes - episode_reward: -370.400 [-472.029, -133.017] - loss: 17661.699 - mae: 1290.076 - mean_q: 1672.387 Interval 3909 (1954000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5468 3 episodes - episode_reward: -414.485 [-820.659, -44.202] - loss: 17170.598 - mae: 1246.593 - mean_q: 1607.203 Interval 3910 (1954500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3336 2 episodes - episode_reward: -416.085 [-524.735, -307.435] - loss: 17827.572 - mae: 1214.381 - mean_q: 1560.947 Interval 3911 (1955000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.7947 4 episodes - episode_reward: -318.453 [-554.518, -135.440] - loss: 17229.254 - mae: 1210.256 - mean_q: 1561.069 Interval 3912 (1955500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7225 5 episodes - episode_reward: -458.040 [-1165.716, -115.090] - loss: 18909.559 - mae: 1193.661 - mean_q: 1537.965 Interval 3913 (1956000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.4624 2 episodes - episode_reward: -836.311 [-1037.441, -635.182] - loss: 18427.309 - mae: 1161.893 - mean_q: 1497.050 Interval 3914 (1956500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.1634 3 episodes - episode_reward: -706.148 [-1297.168, -273.790] - loss: 17765.641 - mae: 1109.878 - mean_q: 1436.204 Interval 3915 (1957000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6575 6 episodes - episode_reward: -224.944 [-793.264, -98.294] - loss: 16055.027 - mae: 1093.001 - mean_q: 1421.609 Interval 3916 (1957500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.2501 4 episodes - episode_reward: -540.212 [-733.189, -323.973] - loss: 15212.752 - mae: 1081.245 - mean_q: 1411.146 Interval 3917 (1958000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.8789 3 episodes - episode_reward: -479.388 [-743.433, -262.337] - loss: 17153.975 - mae: 1074.750 - mean_q: 1412.772 Interval 3918 (1958500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.7668 3 episodes - episode_reward: -867.211 [-1583.236, -185.190] - loss: 22996.396 - mae: 1096.982 - mean_q: 1448.879 Interval 3919 (1959000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8139 4 episodes - episode_reward: -372.416 [-833.767, -181.827] - loss: 18492.203 - mae: 1123.314 - mean_q: 1491.781 Interval 3920 (1959500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5708 3 episodes - episode_reward: -560.498 [-736.338, -350.311] - loss: 22846.924 - mae: 1167.298 - mean_q: 1549.090 Interval 3921 (1960000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -5.1663 4 episodes - episode_reward: -712.283 [-1166.855, -254.430] - loss: 33612.156 - mae: 1200.925 - mean_q: 1586.284 Interval 3922 (1960500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2865 3 episodes - episode_reward: -366.290 [-558.836, -175.487] - loss: 27236.523 - mae: 1192.396 - mean_q: 1564.115 Interval 3923 (1961000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1589 3 episodes - episode_reward: -385.559 [-466.665, -337.931] - loss: 22893.961 - mae: 1188.100 - mean_q: 1552.880 Interval 3924 (1961500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4970 3 episodes - episode_reward: -243.307 [-318.598, -107.112] - loss: 22382.891 - mae: 1209.829 - mean_q: 1586.527 Interval 3925 (1962000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.4949 5 episodes - episode_reward: -340.637 [-690.599, -90.690] - loss: 18879.281 - mae: 1206.540 - mean_q: 1583.692 Interval 3926 (1962500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.0584 7 episodes - episode_reward: -302.230 [-482.541, -113.226] - loss: 21328.068 - mae: 1217.168 - mean_q: 1600.197 Interval 3927 (1963000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9133 4 episodes - episode_reward: -361.520 [-798.942, -159.878] - loss: 21171.701 - mae: 1214.197 - mean_q: 1599.845 Interval 3928 (1963500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.4907 3 episodes - episode_reward: -652.049 [-1345.379, -126.101] - loss: 19313.184 - mae: 1174.168 - mean_q: 1548.870 Interval 3929 (1964000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.7755 5 episodes - episode_reward: -322.027 [-455.579, -134.971] - loss: 20660.766 - mae: 1162.922 - mean_q: 1532.677 Interval 3930 (1964500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.2675 1 episodes - episode_reward: -567.963 [-567.963, -567.963] - loss: 18480.086 - mae: 1131.102 - mean_q: 1489.671 Interval 3931 (1965000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4958 1 episodes - episode_reward: -680.342 [-680.342, -680.342] - loss: 17987.658 - mae: 1099.266 - mean_q: 1444.090 Interval 3932 (1965500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.6744 3 episodes - episode_reward: -588.841 [-1283.382, -100.000] - loss: 17217.166 - mae: 1102.685 - mean_q: 1452.695 Interval 3933 (1966000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.2756 3 episodes - episode_reward: -624.095 [-1322.408, -106.304] - loss: 18499.428 - mae: 1104.800 - mean_q: 1461.043 Interval 3934 (1966500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.3152 6 episodes - episode_reward: -270.383 [-394.162, -124.924] - loss: 17438.902 - mae: 1111.396 - mean_q: 1477.828 Interval 3935 (1967000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.8353 6 episodes - episode_reward: -212.477 [-354.519, -67.462] - loss: 17486.963 - mae: 1145.936 - mean_q: 1534.706 Interval 3936 (1967500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.3324 7 episodes - episode_reward: -301.844 [-693.727, -95.571] - loss: 20207.787 - mae: 1192.155 - mean_q: 1609.953 Interval 3937 (1968000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1536 6 episodes - episode_reward: -304.000 [-471.184, -140.139] - loss: 23675.842 - mae: 1263.643 - mean_q: 1718.451 Interval 3938 (1968500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.2085 5 episodes - episode_reward: -400.206 [-604.111, -98.854] - loss: 23746.584 - mae: 1346.723 - mean_q: 1833.668 Interval 3939 (1969000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9155 3 episodes - episode_reward: -446.550 [-824.984, -100.000] - loss: 24302.990 - mae: 1405.110 - mean_q: 1908.656 Interval 3940 (1969500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.9516 4 episodes - episode_reward: -423.334 [-670.515, -272.659] - loss: 29134.387 - mae: 1415.912 - mean_q: 1909.043 Interval 3941 (1970000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.3079 3 episodes - episode_reward: -559.258 [-1351.874, -132.051] - loss: 25234.129 - mae: 1390.061 - mean_q: 1869.224 Interval 3942 (1970500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1942 4 episodes - episode_reward: -273.071 [-382.109, -144.088] - loss: 22514.889 - mae: 1348.311 - mean_q: 1816.558 Interval 3943 (1971000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8580 6 episodes - episode_reward: -145.863 [-199.799, -84.803] - loss: 23409.076 - mae: 1329.260 - mean_q: 1791.192 Interval 3944 (1971500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0309 5 episodes - episode_reward: -298.862 [-504.266, -153.290] - loss: 23156.172 - mae: 1292.475 - mean_q: 1740.633 Interval 3945 (1972000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -5.0970 4 episodes - episode_reward: -668.288 [-983.846, -285.743] - loss: 21744.832 - mae: 1256.959 - mean_q: 1695.601 Interval 3946 (1972500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6948 1 episodes - episode_reward: -767.368 [-767.368, -767.368] - loss: 22291.074 - mae: 1275.785 - mean_q: 1725.190 Interval 3947 (1973000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0278 4 episodes - episode_reward: -392.708 [-968.220, -110.867] - loss: 22735.006 - mae: 1276.067 - mean_q: 1721.866 Interval 3948 (1973500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0989 1 episodes - episode_reward: -397.775 [-397.775, -397.775] - loss: 24635.748 - mae: 1303.957 - mean_q: 1766.494 Interval 3949 (1974000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0414 3 episodes - episode_reward: -399.729 [-641.646, -124.327] - loss: 26219.910 - mae: 1301.250 - mean_q: 1758.614 Interval 3950 (1974500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5659 2 episodes - episode_reward: -341.837 [-504.719, -178.956] - loss: 25753.766 - mae: 1298.199 - mean_q: 1750.996 Interval 3951 (1975000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8979 4 episodes - episode_reward: -237.693 [-282.393, -134.457] - loss: 31605.301 - mae: 1318.193 - mean_q: 1781.280 Interval 3952 (1975500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0751 4 episodes - episode_reward: -263.562 [-516.092, -127.703] - loss: 26332.514 - mae: 1369.997 - mean_q: 1846.640 Interval 3953 (1976000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9899 3 episodes - episode_reward: -486.280 [-711.106, -244.221] - loss: 30623.354 - mae: 1335.548 - mean_q: 1799.855 Interval 3954 (1976500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.4721 3 episodes - episode_reward: -362.552 [-393.677, -311.197] - loss: 34662.855 - mae: 1377.746 - mean_q: 1859.775 Interval 3955 (1977000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.1889 5 episodes - episode_reward: -335.685 [-377.089, -288.099] - loss: 33193.723 - mae: 1366.445 - mean_q: 1842.517 Interval 3956 (1977500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.2438 4 episodes - episode_reward: -428.316 [-558.101, -295.498] - loss: 32962.977 - mae: 1345.609 - mean_q: 1813.456 Interval 3957 (1978000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.0185 5 episodes - episode_reward: -181.631 [-330.401, -96.719] - loss: 33012.840 - mae: 1341.040 - mean_q: 1809.777 Interval 3958 (1978500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8114 3 episodes - episode_reward: -412.582 [-774.521, -181.083] - loss: 34119.773 - mae: 1348.887 - mean_q: 1821.998 Interval 3959 (1979000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4534 1 episodes - episode_reward: -436.794 [-436.794, -436.794] - loss: 42564.926 - mae: 1352.147 - mean_q: 1824.483 Interval 3960 (1979500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6523 3 episodes - episode_reward: -289.512 [-432.159, -138.064] - loss: 44735.711 - mae: 1374.702 - mean_q: 1856.868 Interval 3961 (1980000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2832 2 episodes - episode_reward: -309.917 [-418.982, -200.853] - loss: 42342.332 - mae: 1381.278 - mean_q: 1867.409 Interval 3962 (1980500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0188 2 episodes - episode_reward: -271.157 [-297.221, -245.094] - loss: 37859.621 - mae: 1384.710 - mean_q: 1871.973 Interval 3963 (1981000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2619 2 episodes - episode_reward: -312.878 [-333.761, -291.994] - loss: 37456.020 - mae: 1395.023 - mean_q: 1885.466 Interval 3964 (1981500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9209 2 episodes - episode_reward: -239.112 [-251.923, -226.301] - loss: 32746.299 - mae: 1419.980 - mean_q: 1920.547 Interval 3965 (1982000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8842 1 episodes - episode_reward: -564.290 [-564.290, -564.290] - loss: 34266.551 - mae: 1477.210 - mean_q: 1999.810 Interval 3966 (1982500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7993 3 episodes - episode_reward: -243.761 [-287.122, -195.045] - loss: 38830.043 - mae: 1548.283 - mean_q: 2093.563 Interval 3967 (1983000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9376 5 episodes - episode_reward: -216.326 [-316.581, -163.421] - loss: 40152.191 - mae: 1589.523 - mean_q: 2147.544 Interval 3968 (1983500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1552 3 episodes - episode_reward: -180.344 [-203.140, -158.800] - loss: 37851.516 - mae: 1630.165 - mean_q: 2200.805 Interval 3969 (1984000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0112 1 episodes - episode_reward: -506.720 [-506.720, -506.720] - loss: 36328.641 - mae: 1650.578 - mean_q: 2229.607 Interval 3970 (1984500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3987 4 episodes - episode_reward: -171.744 [-209.837, -129.614] - loss: 38026.105 - mae: 1673.058 - mean_q: 2261.922 Interval 3971 (1985000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8509 3 episodes - episode_reward: -308.478 [-510.261, -114.406] - loss: 37101.438 - mae: 1707.200 - mean_q: 2317.200 Interval 3972 (1985500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2308 Interval 3973 (1986000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2387 Interval 3974 (1986500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -1.3860 3 episodes - episode_reward: -313.022 [-634.590, -119.473] - loss: 32046.568 - mae: 1795.450 - mean_q: 2433.787 Interval 3975 (1987000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5643 3 episodes - episode_reward: -241.279 [-284.728, -203.904] - loss: 30346.490 - mae: 1763.358 - mean_q: 2381.973 Interval 3976 (1987500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2031 3 episodes - episode_reward: -335.431 [-381.371, -299.389] - loss: 31844.533 - mae: 1742.086 - mean_q: 2350.260 Interval 3977 (1988000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9856 2 episodes - episode_reward: -277.757 [-315.482, -240.031] - loss: 33155.102 - mae: 1764.823 - mean_q: 2374.675 Interval 3978 (1988500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7910 4 episodes - episode_reward: -373.654 [-621.087, -240.658] - loss: 30305.885 - mae: 1695.284 - mean_q: 2277.921 Interval 3979 (1989000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4175 3 episodes - episode_reward: -234.670 [-269.853, -188.635] - loss: 33778.832 - mae: 1681.075 - mean_q: 2260.703 Interval 3980 (1989500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.2067 6 episodes - episode_reward: -432.664 [-1617.802, -130.789] - loss: 28771.492 - mae: 1599.691 - mean_q: 2154.664 Interval 3981 (1990000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1205 5 episodes - episode_reward: -212.255 [-263.195, -78.319] - loss: 27937.236 - mae: 1583.196 - mean_q: 2135.711 Interval 3982 (1990500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.6954 2 episodes - episode_reward: -1351.866 [-2471.546, -232.185] - loss: 33393.520 - mae: 1563.593 - mean_q: 2112.008 Interval 3983 (1991000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8268 5 episodes - episode_reward: -207.211 [-321.188, -149.692] - loss: 35876.164 - mae: 1593.456 - mean_q: 2155.389 Interval 3984 (1991500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6972 1 episodes - episode_reward: -238.766 [-238.766, -238.766] - loss: 35256.941 - mae: 1602.284 - mean_q: 2173.516 Interval 3985 (1992000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6476 4 episodes - episode_reward: -354.992 [-524.156, -202.088] - loss: 40721.637 - mae: 1694.644 - mean_q: 2299.782 Interval 3986 (1992500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3841 4 episodes - episode_reward: -169.709 [-204.856, -137.817] - loss: 37268.848 - mae: 1733.242 - mean_q: 2352.937 Interval 3987 (1993000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8160 2 episodes - episode_reward: -237.136 [-247.539, -226.734] - loss: 44566.773 - mae: 1777.467 - mean_q: 2407.380 Interval 3988 (1993500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3307 4 episodes - episode_reward: -157.577 [-285.337, -85.446] - loss: 41336.016 - mae: 1820.116 - mean_q: 2463.764 Interval 3989 (1994000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1622 Interval 3990 (1994500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0176 2 episodes - episode_reward: -298.604 [-439.022, -158.186] - loss: 36842.070 - mae: 1955.351 - mean_q: 2651.236 Interval 3991 (1995000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0058 1 episodes - episode_reward: -196.500 [-196.500, -196.500] - loss: 40634.930 - mae: 2066.208 - mean_q: 2803.049 Interval 3992 (1995500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4822 2 episodes - episode_reward: -494.268 [-669.957, -318.579] - loss: 43213.895 - mae: 2126.417 - mean_q: 2885.053 Interval 3993 (1996000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7768 1 episodes - episode_reward: -434.087 [-434.087, -434.087] - loss: 47107.844 - mae: 2216.402 - mean_q: 3008.144 Interval 3994 (1996500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.4117 4 episodes - episode_reward: -398.587 [-514.315, -218.769] - loss: 44606.969 - mae: 2243.639 - mean_q: 3052.658 Interval 3995 (1997000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0872 1 episodes - episode_reward: -270.604 [-270.604, -270.604] - loss: 49619.430 - mae: 2358.760 - mean_q: 3210.528 Interval 3996 (1997500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5102 3 episodes - episode_reward: -356.579 [-660.682, -67.592] - loss: 50961.574 - mae: 2415.370 - mean_q: 3285.600 Interval 3997 (1998000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9570 2 episodes - episode_reward: -226.081 [-289.248, -162.914] - loss: 50313.656 - mae: 2428.350 - mean_q: 3308.075 Interval 3998 (1998500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0194 5 episodes - episode_reward: -210.142 [-293.627, -120.528] - loss: 48021.801 - mae: 2563.596 - mean_q: 3500.902 Interval 3999 (1999000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5356 4 episodes - episode_reward: -337.911 [-447.905, -163.559] - loss: 54349.191 - mae: 2691.017 - mean_q: 3680.055 Interval 4000 (1999500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4137 done, took 15834.979 seconds
weights.append(f'dqn_lunar_weights_five.h5f')
models.append(dqn)
models[-1].load_weights(weights[-1])
df = pd.DataFrame(history.history)
ax = df['episode_reward'].plot(color = 'lightgray')
df['episode_reward'].rolling(50).mean().plot(color = 'black')
ax.set_xlabel("Episode")
plt.ylabel("Rolling Mean (10) Cumulative Return")
plt.show()
The longer training time didn't do much for the model. The average has remained pretty steady, and we still see som ebig negative numbers even closer to the end of training.
df.to_csv('lunar_training_weights_5')
I decided to see if longer training would have a similar result with a window size of 4.
rl['Model 6'] = [4, '128/64/32', 2000000, 50000, 500, None]
model = Sequential()
#input_shape of four, to mirror window length
model.add(Flatten(input_shape=(4,) + env.observation_space.shape))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())
Model: "sequential_41" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= flatten_40 (Flatten) (None, 32) 0 _________________________________________________________________ dense_160 (Dense) (None, 128) 4224 _________________________________________________________________ activation_160 (Activation) (None, 128) 0 _________________________________________________________________ dense_161 (Dense) (None, 64) 8256 _________________________________________________________________ activation_161 (Activation) (None, 64) 0 _________________________________________________________________ dense_162 (Dense) (None, 32) 2080 _________________________________________________________________ activation_162 (Activation) (None, 32) 0 _________________________________________________________________ dense_163 (Dense) (None, 4) 132 _________________________________________________________________ activation_163 (Activation) (None, 4) 0 ================================================================= Total params: 14,692 Trainable params: 14,692 Non-trainable params: 0 _________________________________________________________________ None
memory = SequentialMemory(limit=50000, window_length=4)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=30,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
# weights_filename = f'dqn_lunar_weights_six.h5f'
checkpoint_weights_filename = 'dqn_lunar_weights_{step}.h5f'
log_filename = f'dqn_lunar_log.json'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]
start_time = time.time()
history = dqn.fit(env, callbacks=callbacks, nb_steps=2000000, log_interval=500)
end_time = time.time()
Training for 2000000 steps ... Interval 1 (0 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1308 4 episodes - episode_reward: -368.012 [-651.725, -21.285] - loss: 46.666 - mae: 9.730 - mean_q: -4.722 Interval 2 (500 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -2.4911 5 episodes - episode_reward: -263.724 [-495.612, -151.052] - loss: 36.090 - mae: 12.137 - mean_q: -8.407 Interval 3 (1000 steps performed) 500/500 [==============================] - 2s 4ms/step - reward: -3.3767 5 episodes - episode_reward: -330.986 [-615.214, -97.230] - loss: 26.944 - mae: 13.228 - mean_q: -9.012 Interval 4 (1500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.0701 4 episodes - episode_reward: -272.253 [-398.641, -180.586] - loss: 15.947 - mae: 16.375 - mean_q: -13.281 Interval 5 (2000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2415 4 episodes - episode_reward: -163.620 [-334.278, -75.177] - loss: 15.445 - mae: 18.128 - mean_q: -13.589 Interval 6 (2500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.0436 3 episodes - episode_reward: -123.567 [-190.305, -78.664] - loss: 11.695 - mae: 18.687 - mean_q: -13.149 Interval 7 (3000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6956 3 episodes - episode_reward: -167.590 [-181.927, -155.204] - loss: 13.361 - mae: 19.488 - mean_q: -12.554 Interval 8 (3500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2157 3 episodes - episode_reward: -185.374 [-291.185, -96.950] - loss: 10.908 - mae: 21.246 - mean_q: -12.926 Interval 9 (4000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.2096 3 episodes - episode_reward: -205.619 [-274.586, -137.702] - loss: 8.629 - mae: 22.988 - mean_q: -13.465 Interval 10 (4500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1631 Interval 11 (5000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.4775 2 episodes - episode_reward: -165.998 [-210.863, -121.132] - loss: 6.905 - mae: 25.592 - mean_q: -8.589 Interval 12 (5500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0516 Interval 13 (6000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2382 Interval 14 (6500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0676 Interval 15 (7000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1840 Interval 16 (7500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.1560 Interval 17 (8000 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.1521 Interval 18 (8500 steps performed) 500/500 [==============================] - 9s 17ms/step - reward: -0.1678 Interval 19 (9000 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.1680 Interval 20 (9500 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.2162 Interval 21 (10000 steps performed) 500/500 [==============================] - 12s 23ms/step - reward: -0.1340 Interval 22 (10500 steps performed) 500/500 [==============================] - 12s 24ms/step - reward: -0.2538 Interval 23 (11000 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -0.1313 Interval 24 (11500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -0.1905 Interval 25 (12000 steps performed) 500/500 [==============================] - 17s 34ms/step - reward: -0.2051 Interval 26 (12500 steps performed) 500/500 [==============================] - 18s 36ms/step - reward: -0.1658 Interval 27 (13000 steps performed) 500/500 [==============================] - 20s 39ms/step - reward: -0.1676 Interval 28 (13500 steps performed) 500/500 [==============================] - 20s 40ms/step - reward: -0.1691 Interval 29 (14000 steps performed) 500/500 [==============================] - 18s 36ms/step - reward: -0.1861 Interval 30 (14500 steps performed) 500/500 [==============================] - 18s 37ms/step - reward: -0.1838 Interval 31 (15000 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -0.6659 2 episodes - episode_reward: -945.212 [-1789.855, -100.569] - loss: 3.446 - mae: 19.401 - mean_q: 14.523 Interval 32 (15500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1517 Interval 33 (16000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1580 Interval 34 (16500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1724 Interval 35 (17000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2195 Interval 36 (17500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1296 Interval 37 (18000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1870 Interval 38 (18500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1822 Interval 39 (19000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1968 Interval 40 (19500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.2047 Interval 41 (20000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1782 Interval 42 (20500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1583 Interval 43 (21000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.1827 Interval 44 (21500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.1813 Interval 45 (22000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2945 1 episodes - episode_reward: -1195.587 [-1195.587, -1195.587] - loss: 3.036 - mae: 17.996 - mean_q: 13.217 Interval 46 (22500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1187 Interval 47 (23000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1729 Interval 48 (23500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1950 Interval 49 (24000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1537 Interval 50 (24500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1758 Interval 51 (25000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.2066 Interval 52 (25500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1682 Interval 53 (26000 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1964 Interval 54 (26500 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1607 Interval 55 (27000 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.1686 Interval 56 (27500 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.2060 Interval 57 (28000 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.1652 Interval 58 (28500 steps performed) 500/500 [==============================] - 11s 23ms/step - reward: -0.1971 Interval 59 (29000 steps performed) 500/500 [==============================] - 11s 22ms/step - reward: -0.1889 Interval 60 (29500 steps performed) 500/500 [==============================] - 12s 23ms/step - reward: -0.1785 Interval 61 (30000 steps performed) 500/500 [==============================] - 12s 24ms/step - reward: -0.1777 Interval 62 (30500 steps performed) 500/500 [==============================] - 12s 24ms/step - reward: -0.2225 Interval 63 (31000 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -0.1594 Interval 64 (31500 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -0.1767 Interval 65 (32000 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -0.1648 Interval 66 (32500 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -0.2060 Interval 67 (33000 steps performed) 500/500 [==============================] - 15s 29ms/step - reward: -0.1702 Interval 68 (33500 steps performed) 500/500 [==============================] - 15s 29ms/step - reward: -0.1477 Interval 69 (34000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -0.1429 Interval 70 (34500 steps performed) 500/500 [==============================] - 16s 31ms/step - reward: -0.1781 Interval 71 (35000 steps performed) 500/500 [==============================] - 16s 33ms/step - reward: -0.2082 Interval 72 (35500 steps performed) 500/500 [==============================] - 17s 34ms/step - reward: -0.1815 Interval 73 (36000 steps performed) 500/500 [==============================] - 18s 35ms/step - reward: -0.1519 Interval 74 (36500 steps performed) 500/500 [==============================] - 18s 36ms/step - reward: -0.1664 Interval 75 (37000 steps performed) 500/500 [==============================] - 18s 37ms/step - reward: -0.1703 Interval 76 (37500 steps performed) 500/500 [==============================] - 19s 38ms/step - reward: -0.1768 Interval 77 (38000 steps performed) 500/500 [==============================] - 19s 39ms/step - reward: -0.1702 Interval 78 (38500 steps performed) 500/500 [==============================] - 20s 40ms/step - reward: -0.1918 Interval 79 (39000 steps performed) 500/500 [==============================] - 20s 40ms/step - reward: -0.2207 Interval 80 (39500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.4346 1 episodes - episode_reward: -3168.031 [-3168.031, -3168.031] - loss: 1.825 - mae: 11.675 - mean_q: 8.202 Interval 81 (40000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0105 Interval 82 (40500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1583 Interval 83 (41000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1474 Interval 84 (41500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1617 Interval 85 (42000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2340 Interval 86 (42500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1316 Interval 87 (43000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1542 Interval 88 (43500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.5764 Interval 89 (44000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2862 1 episodes - episode_reward: -1008.549 [-1008.549, -1008.549] - loss: 1.402 - mae: 11.126 - mean_q: 8.122 Interval 90 (44500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1107 Interval 91 (45000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2092 Interval 92 (45500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1693 Interval 93 (46000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1713 Interval 94 (46500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1048 Interval 95 (47000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1810 Interval 96 (47500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2106 Interval 97 (48000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1079 Interval 98 (48500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1639 Interval 99 (49000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1803 Interval 100 (49500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1803 Interval 101 (50000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1935 Interval 102 (50500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.2036 Interval 103 (51000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1521 Interval 104 (51500 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.1837 Interval 105 (52000 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.1883 Interval 106 (52500 steps performed) 500/500 [==============================] - 9s 17ms/step - reward: -0.1728 Interval 107 (53000 steps performed) 500/500 [==============================] - 9s 17ms/step - reward: -0.1926 Interval 108 (53500 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.1837 Interval 109 (54000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.2216 Interval 110 (54500 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1497 Interval 111 (55000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.1618 Interval 112 (55500 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.2127 Interval 113 (56000 steps performed) 500/500 [==============================] - 11s 21ms/step - reward: -0.1797 Interval 114 (56500 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.1735 Interval 115 (57000 steps performed) 500/500 [==============================] - 10s 21ms/step - reward: -0.2688 Interval 116 (57500 steps performed) 500/500 [==============================] - 11s 22ms/step - reward: -0.0791 Interval 117 (58000 steps performed) 500/500 [==============================] - 11s 23ms/step - reward: -0.1483 Interval 118 (58500 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -0.1792 Interval 119 (59000 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -0.1869 Interval 120 (59500 steps performed) 500/500 [==============================] - 13s 26ms/step - reward: -0.1759 Interval 121 (60000 steps performed) 500/500 [==============================] - 13s 27ms/step - reward: -0.1891 Interval 122 (60500 steps performed) 500/500 [==============================] - 13s 27ms/step - reward: -0.2144 Interval 123 (61000 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -0.1597 Interval 124 (61500 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -0.1746 Interval 125 (62000 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -0.2115 Interval 126 (62500 steps performed) 500/500 [==============================] - 14s 29ms/step - reward: -0.2010 Interval 127 (63000 steps performed) 500/500 [==============================] - 15s 30ms/step - reward: -0.2271 Interval 128 (63500 steps performed) 500/500 [==============================] - 14s 29ms/step - reward: -0.1543 Interval 129 (64000 steps performed) 500/500 [==============================] - 14s 29ms/step - reward: -0.1564 Interval 130 (64500 steps performed) 500/500 [==============================] - 14s 29ms/step - reward: -0.1680 Interval 131 (65000 steps performed) 500/500 [==============================] - 15s 29ms/step - reward: -0.1688 Interval 132 (65500 steps performed) 500/500 [==============================] - 16s 32ms/step - reward: -0.1740 Interval 133 (66000 steps performed) 500/500 [==============================] - 17s 33ms/step - reward: -0.1726 Interval 134 (66500 steps performed) 500/500 [==============================] - 18s 35ms/step - reward: -0.1925 Interval 135 (67000 steps performed) 500/500 [==============================] - 18s 35ms/step - reward: -0.1672 Interval 136 (67500 steps performed) 500/500 [==============================] - 18s 36ms/step - reward: -0.1853 Interval 137 (68000 steps performed) 500/500 [==============================] - 18s 37ms/step - reward: -0.1606 Interval 138 (68500 steps performed) 500/500 [==============================] - 18s 37ms/step - reward: -0.1772 Interval 139 (69000 steps performed) 500/500 [==============================] - 19s 37ms/step - reward: -0.1805 Interval 140 (69500 steps performed) 500/500 [==============================] - 19s 37ms/step - reward: -0.2049 Interval 141 (70000 steps performed) 500/500 [==============================] - 19s 39ms/step - reward: -0.1586 Interval 142 (70500 steps performed) 500/500 [==============================] - 20s 39ms/step - reward: -0.2061 Interval 143 (71000 steps performed) 500/500 [==============================] - 20s 40ms/step - reward: -0.1956 Interval 144 (71500 steps performed) 500/500 [==============================] - 20s 41ms/step - reward: -0.1909 Interval 145 (72000 steps performed) 500/500 [==============================] - 20s 40ms/step - reward: -0.1564 Interval 146 (72500 steps performed) 500/500 [==============================] - 21s 43ms/step - reward: -0.2064 Interval 147 (73000 steps performed) 500/500 [==============================] - 19s 39ms/step - reward: 0.0894 1 episodes - episode_reward: -5000.546 [-5000.546, -5000.546] - loss: 0.787 - mae: 6.119 - mean_q: -1.148 Interval 148 (73500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3487 5 episodes - episode_reward: -129.844 [-255.197, 14.481] - loss: 1.206 - mae: 6.347 - mean_q: -0.824 Interval 149 (74000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5432 2 episodes - episode_reward: -399.519 [-438.510, -360.528] - loss: 3.038 - mae: 6.892 - mean_q: -0.180 Interval 150 (74500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0245 Interval 151 (75000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7651 5 episodes - episode_reward: -73.662 [-329.779, 114.137] - loss: 1.954 - mae: 6.724 - mean_q: 0.429 Interval 152 (75500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5840 7 episodes - episode_reward: -180.600 [-387.221, -82.638] - loss: 3.656 - mae: 7.109 - mean_q: 1.390 Interval 153 (76000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0432 2 episodes - episode_reward: -298.050 [-436.481, -159.618] - loss: 4.926 - mae: 7.172 - mean_q: 1.603 Interval 154 (76500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0648 2 episodes - episode_reward: -38.392 [-56.542, -20.242] - loss: 2.339 - mae: 7.158 - mean_q: 1.744 Interval 155 (77000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1144 Interval 156 (77500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3691 1 episodes - episode_reward: -156.154 [-156.154, -156.154] - loss: 3.564 - mae: 7.322 - mean_q: 2.317 Interval 157 (78000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2645 1 episodes - episode_reward: -133.399 [-133.399, -133.399] - loss: 6.006 - mae: 7.243 - mean_q: 2.732 Interval 158 (78500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4160 2 episodes - episode_reward: -114.896 [-119.459, -110.332] - loss: 2.155 - mae: 7.333 - mean_q: 3.738 Interval 159 (79000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4974 3 episodes - episode_reward: -76.488 [-120.076, -17.794] - loss: 3.105 - mae: 7.726 - mean_q: 4.817 Interval 160 (79500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0184 1 episodes - episode_reward: 20.928 [20.928, 20.928] - loss: 5.340 - mae: 7.939 - mean_q: 5.430 Interval 161 (80000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0904 Interval 162 (80500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0725 Interval 163 (81000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2864 1 episodes - episode_reward: 19.421 [19.421, 19.421] - loss: 3.126 - mae: 8.406 - mean_q: 7.010 Interval 164 (81500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0138 Interval 165 (82000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1572 1 episodes - episode_reward: 98.237 [98.237, 98.237] - loss: 4.431 - mae: 9.070 - mean_q: 8.177 Interval 166 (82500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8930 1 episodes - episode_reward: -386.593 [-386.593, -386.593] - loss: 2.625 - mae: 9.314 - mean_q: 8.488 Interval 167 (83000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2198 1 episodes - episode_reward: -162.639 [-162.639, -162.639] - loss: 2.996 - mae: 9.372 - mean_q: 8.980 Interval 168 (83500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6620 1 episodes - episode_reward: -336.521 [-336.521, -336.521] - loss: 2.485 - mae: 9.842 - mean_q: 9.674 Interval 169 (84000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0239 Interval 170 (84500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3811 2 episodes - episode_reward: -85.627 [-297.053, 125.798] - loss: 2.613 - mae: 10.292 - mean_q: 10.612 Interval 171 (85000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0630 Interval 172 (85500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6043 3 episodes - episode_reward: -115.985 [-155.280, -77.603] - loss: 2.806 - mae: 10.852 - mean_q: 11.575 Interval 173 (86000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0372 4 episodes - episode_reward: -135.503 [-314.919, 17.525] - loss: 4.587 - mae: 11.387 - mean_q: 12.810 Interval 174 (86500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0388 Interval 175 (87000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1521 Interval 176 (87500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0169 1 episodes - episode_reward: 50.935 [50.935, 50.935] - loss: 2.547 - mae: 11.964 - mean_q: 14.037 Interval 177 (88000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3227 Interval 178 (88500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1027 2 episodes - episode_reward: 31.993 [-100.000, 163.986] - loss: 4.572 - mae: 12.499 - mean_q: 14.796 Interval 179 (89000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2198 Interval 180 (89500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0749 Interval 181 (90000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1846 Interval 182 (90500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.3547 Interval 183 (91000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.0322 Interval 184 (91500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.1084 Interval 185 (92000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1167 Interval 186 (92500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: 0.0295 Interval 187 (93000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4223 2 episodes - episode_reward: -378.926 [-500.377, -257.475] - loss: 4.583 - mae: 13.780 - mean_q: 16.842 Interval 188 (93500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0355 Interval 189 (94000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1021 Interval 190 (94500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.2603 Interval 191 (95000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1698 Interval 192 (95500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2327 Interval 193 (96000 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.3336 Interval 194 (96500 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.1430 Interval 195 (97000 steps performed) 500/500 [==============================] - 9s 19ms/step - reward: -0.2990 Interval 196 (97500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3964 1 episodes - episode_reward: -921.525 [-921.525, -921.525] - loss: 3.784 - mae: 16.976 - mean_q: 20.801 Interval 197 (98000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0759 Interval 198 (98500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2283 Interval 199 (99000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0340 Interval 200 (99500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1277 Interval 201 (100000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0442 2 episodes - episode_reward: -146.102 [-191.982, -100.221] - loss: 3.303 - mae: 18.086 - mean_q: 22.267 Interval 202 (100500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1499 Interval 203 (101000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0506 Interval 204 (101500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0461 Interval 205 (102000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2066 3 episodes - episode_reward: -18.886 [-39.811, -1.690] - loss: 5.491 - mae: 20.129 - mean_q: 25.107 Interval 206 (102500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0411 2 episodes - episode_reward: 4.608 [-210.334, 219.551] - loss: 4.225 - mae: 20.853 - mean_q: 25.899 Interval 207 (103000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3098 1 episodes - episode_reward: -160.571 [-160.571, -160.571] - loss: 5.302 - mae: 21.389 - mean_q: 26.661 Interval 208 (103500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0891 Interval 209 (104000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8464 1 episodes - episode_reward: -393.140 [-393.140, -393.140] - loss: 4.817 - mae: 22.904 - mean_q: 29.060 Interval 210 (104500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1402 1 episodes - episode_reward: -185.707 [-185.707, -185.707] - loss: 5.237 - mae: 23.732 - mean_q: 29.661 Interval 211 (105000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2162 2 episodes - episode_reward: -63.031 [-100.907, -25.155] - loss: 7.361 - mae: 24.153 - mean_q: 30.195 Interval 212 (105500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3436 Interval 213 (106000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2006 1 episodes - episode_reward: -181.627 [-181.627, -181.627] - loss: 3.878 - mae: 24.816 - mean_q: 31.447 Interval 214 (106500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4622 2 episodes - episode_reward: -168.416 [-228.417, -108.416] - loss: 6.327 - mae: 25.166 - mean_q: 31.844 Interval 215 (107000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0142 Interval 216 (107500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2198 1 episodes - episode_reward: 101.762 [101.762, 101.762] - loss: 6.353 - mae: 25.382 - mean_q: 31.910 Interval 217 (108000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0406 Interval 218 (108500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0538 2 episodes - episode_reward: 53.294 [-100.000, 206.588] - loss: 5.048 - mae: 26.191 - mean_q: 32.952 Interval 219 (109000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2273 2 episodes - episode_reward: 40.018 [-100.000, 180.036] - loss: 4.212 - mae: 26.580 - mean_q: 33.609 Interval 220 (109500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2078 Interval 221 (110000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0104 2 episodes - episode_reward: 51.479 [-120.118, 223.077] - loss: 6.146 - mae: 27.291 - mean_q: 34.284 Interval 222 (110500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3190 1 episodes - episode_reward: -235.569 [-235.569, -235.569] - loss: 8.856 - mae: 27.342 - mean_q: 34.459 Interval 223 (111000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0488 Interval 224 (111500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4277 1 episodes - episode_reward: 214.860 [214.860, 214.860] - loss: 4.887 - mae: 27.871 - mean_q: 35.149 Interval 225 (112000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1962 1 episodes - episode_reward: -76.681 [-76.681, -76.681] - loss: 9.763 - mae: 27.785 - mean_q: 34.983 Interval 226 (112500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4736 1 episodes - episode_reward: 271.095 [271.095, 271.095] - loss: 5.820 - mae: 27.971 - mean_q: 35.364 Interval 227 (113000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1404 Interval 228 (113500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5725 1 episodes - episode_reward: 173.606 [173.606, 173.606] - loss: 4.455 - mae: 27.988 - mean_q: 35.380 Interval 229 (114000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2515 1 episodes - episode_reward: 262.185 [262.185, 262.185] - loss: 5.006 - mae: 28.381 - mean_q: 36.366 Interval 230 (114500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3364 1 episodes - episode_reward: -234.318 [-234.318, -234.318] - loss: 6.385 - mae: 28.842 - mean_q: 36.805 Interval 231 (115000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1219 3 episodes - episode_reward: 30.137 [-90.612, 181.272] - loss: 4.205 - mae: 28.437 - mean_q: 36.244 Interval 232 (115500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0161 Interval 233 (116000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0335 Interval 234 (116500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3514 1 episodes - episode_reward: 91.146 [91.146, 91.146] - loss: 8.956 - mae: 28.356 - mean_q: 36.142 Interval 235 (117000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0402 2 episodes - episode_reward: 55.085 [-84.681, 194.851] - loss: 9.166 - mae: 28.652 - mean_q: 36.519 Interval 236 (117500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3408 3 episodes - episode_reward: -61.703 [-127.184, -25.605] - loss: 5.010 - mae: 28.507 - mean_q: 36.333 Interval 237 (118000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4145 1 episodes - episode_reward: 228.966 [228.966, 228.966] - loss: 9.084 - mae: 28.773 - mean_q: 36.289 Interval 238 (118500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0069 1 episodes - episode_reward: -94.942 [-94.942, -94.942] - loss: 7.382 - mae: 28.788 - mean_q: 36.429 Interval 239 (119000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1117 Interval 240 (119500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5361 1 episodes - episode_reward: 227.168 [227.168, 227.168] - loss: 6.636 - mae: 28.765 - mean_q: 36.488 Interval 241 (120000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3497 1 episodes - episode_reward: 268.180 [268.180, 268.180] - loss: 6.387 - mae: 28.880 - mean_q: 36.597 Interval 242 (120500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3175 1 episodes - episode_reward: 207.638 [207.638, 207.638] - loss: 7.399 - mae: 28.642 - mean_q: 36.364 Interval 243 (121000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0343 Interval 244 (121500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4960 1 episodes - episode_reward: 142.905 [142.905, 142.905] - loss: 3.560 - mae: 28.785 - mean_q: 36.814 Interval 245 (122000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2978 1 episodes - episode_reward: 243.842 [243.842, 243.842] - loss: 8.437 - mae: 28.524 - mean_q: 36.115 Interval 246 (122500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2356 Interval 247 (123000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0132 1 episodes - episode_reward: -163.947 [-163.947, -163.947] - loss: 5.972 - mae: 28.956 - mean_q: 36.720 Interval 248 (123500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2777 1 episodes - episode_reward: 196.476 [196.476, 196.476] - loss: 5.822 - mae: 29.578 - mean_q: 37.407 Interval 249 (124000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1851 Interval 250 (124500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1878 Interval 251 (125000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1820 Interval 252 (125500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2491 1 episodes - episode_reward: -124.356 [-124.356, -124.356] - loss: 6.656 - mae: 29.899 - mean_q: 38.266 Interval 253 (126000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6563 2 episodes - episode_reward: -151.292 [-175.131, -127.453] - loss: 5.496 - mae: 30.036 - mean_q: 38.493 Interval 254 (126500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3150 1 episodes - episode_reward: 133.856 [133.856, 133.856] - loss: 6.108 - mae: 29.752 - mean_q: 37.909 Interval 255 (127000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1717 Interval 256 (127500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1470 2 episodes - episode_reward: 36.766 [-136.398, 209.930] - loss: 6.530 - mae: 29.534 - mean_q: 37.790 Interval 257 (128000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2926 1 episodes - episode_reward: 186.741 [186.741, 186.741] - loss: 4.747 - mae: 29.357 - mean_q: 37.347 Interval 258 (128500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0404 3 episodes - episode_reward: 22.366 [-144.980, 159.897] - loss: 6.983 - mae: 29.737 - mean_q: 38.177 Interval 259 (129000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1893 1 episodes - episode_reward: -108.026 [-108.026, -108.026] - loss: 6.260 - mae: 29.493 - mean_q: 37.703 Interval 260 (129500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 5.0463e-04 2 episodes - episode_reward: -66.228 [-150.538, 18.082] - loss: 5.039 - mae: 29.405 - mean_q: 37.779 Interval 261 (130000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4000 1 episodes - episode_reward: 260.314 [260.314, 260.314] - loss: 4.976 - mae: 29.600 - mean_q: 37.637 Interval 262 (130500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3363 1 episodes - episode_reward: 228.754 [228.754, 228.754] - loss: 5.360 - mae: 30.150 - mean_q: 38.183 Interval 263 (131000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0691 Interval 264 (131500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0305 2 episodes - episode_reward: -1.339 [-111.400, 108.722] - loss: 4.592 - mae: 29.964 - mean_q: 37.978 Interval 265 (132000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2341 Interval 266 (132500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1512 1 episodes - episode_reward: 228.022 [228.022, 228.022] - loss: 5.930 - mae: 30.010 - mean_q: 38.369 Interval 267 (133000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1987 1 episodes - episode_reward: 152.244 [152.244, 152.244] - loss: 3.968 - mae: 30.154 - mean_q: 38.616 Interval 268 (133500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4223 Interval 269 (134000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3315 2 episodes - episode_reward: 62.550 [-109.471, 234.570] - loss: 6.147 - mae: 30.827 - mean_q: 39.347 Interval 270 (134500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3965 1 episodes - episode_reward: 261.743 [261.743, 261.743] - loss: 4.678 - mae: 31.260 - mean_q: 39.965 Interval 271 (135000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4093 1 episodes - episode_reward: 242.352 [242.352, 242.352] - loss: 4.375 - mae: 31.487 - mean_q: 40.385 Interval 272 (135500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2303 2 episodes - episode_reward: 71.565 [-72.861, 215.992] - loss: 4.950 - mae: 31.755 - mean_q: 40.638 Interval 273 (136000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1968 2 episodes - episode_reward: 73.037 [-44.311, 190.385] - loss: 4.125 - mae: 32.082 - mean_q: 41.478 Interval 274 (136500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2030 Interval 275 (137000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2080 Interval 276 (137500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5097 1 episodes - episode_reward: 135.840 [135.840, 135.840] - loss: 4.720 - mae: 33.142 - mean_q: 43.293 Interval 277 (138000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6167 1 episodes - episode_reward: 278.813 [278.813, 278.813] - loss: 5.798 - mae: 33.224 - mean_q: 43.513 Interval 278 (138500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5083 3 episodes - episode_reward: 123.365 [-96.516, 240.880] - loss: 5.682 - mae: 33.340 - mean_q: 43.676 Interval 279 (139000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0118 1 episodes - episode_reward: -96.737 [-96.737, -96.737] - loss: 5.342 - mae: 33.904 - mean_q: 44.573 Interval 280 (139500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3642 4 episodes - episode_reward: -12.203 [-134.580, 226.671] - loss: 4.107 - mae: 34.190 - mean_q: 44.732 Interval 281 (140000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8274 3 episodes - episode_reward: -129.863 [-271.751, -52.966] - loss: 4.306 - mae: 34.657 - mean_q: 45.517 Interval 282 (140500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4323 1 episodes - episode_reward: 196.258 [196.258, 196.258] - loss: 5.311 - mae: 35.320 - mean_q: 46.327 Interval 283 (141000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3222 3 episodes - episode_reward: -95.806 [-116.770, -84.464] - loss: 8.224 - mae: 35.399 - mean_q: 46.188 Interval 284 (141500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3969 3 episodes - episode_reward: -34.540 [-91.961, 21.835] - loss: 5.643 - mae: 36.269 - mean_q: 47.141 Interval 285 (142000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4874 4 episodes - episode_reward: -57.168 [-136.091, -6.557] - loss: 7.322 - mae: 36.820 - mean_q: 47.738 Interval 286 (142500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1907 Interval 287 (143000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5732 2 episodes - episode_reward: 213.305 [206.769, 219.840] - loss: 7.008 - mae: 37.204 - mean_q: 48.177 Interval 288 (143500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3297 2 episodes - episode_reward: 81.153 [-55.288, 217.595] - loss: 7.517 - mae: 37.468 - mean_q: 48.533 Interval 289 (144000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1425 3 episodes - episode_reward: -24.900 [-67.508, 0.079] - loss: 7.253 - mae: 37.924 - mean_q: 49.299 Interval 290 (144500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2826 1 episodes - episode_reward: 249.781 [249.781, 249.781] - loss: 7.062 - mae: 38.053 - mean_q: 49.252 Interval 291 (145000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1790 Interval 292 (145500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4226 1 episodes - episode_reward: -300.075 [-300.075, -300.075] - loss: 5.819 - mae: 38.534 - mean_q: 50.316 Interval 293 (146000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2935 2 episodes - episode_reward: 48.243 [-100.000, 196.487] - loss: 7.602 - mae: 38.686 - mean_q: 50.155 Interval 294 (146500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3872 1 episodes - episode_reward: -125.058 [-125.058, -125.058] - loss: 5.289 - mae: 38.592 - mean_q: 50.436 Interval 295 (147000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1351 Interval 296 (147500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4907 1 episodes - episode_reward: 165.056 [165.056, 165.056] - loss: 5.077 - mae: 38.339 - mean_q: 49.737 Interval 297 (148000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0490 Interval 298 (148500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2269 1 episodes - episode_reward: 196.176 [196.176, 196.176] - loss: 6.305 - mae: 38.497 - mean_q: 50.277 Interval 299 (149000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2202 1 episodes - episode_reward: 132.719 [132.719, 132.719] - loss: 7.243 - mae: 38.444 - mean_q: 50.223 Interval 300 (149500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8517 3 episodes - episode_reward: -154.958 [-288.816, -77.058] - loss: 8.375 - mae: 38.562 - mean_q: 50.568 Interval 301 (150000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0588 Interval 302 (150500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3602 1 episodes - episode_reward: 155.485 [155.485, 155.485] - loss: 6.221 - mae: 38.505 - mean_q: 50.224 Interval 303 (151000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4095 1 episodes - episode_reward: 220.899 [220.899, 220.899] - loss: 6.326 - mae: 38.643 - mean_q: 50.503 Interval 304 (151500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3431 1 episodes - episode_reward: 212.414 [212.414, 212.414] - loss: 6.268 - mae: 38.678 - mean_q: 50.449 Interval 305 (152000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2105 Interval 306 (152500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1256 Interval 307 (153000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4823 1 episodes - episode_reward: 171.120 [171.120, 171.120] - loss: 6.878 - mae: 38.500 - mean_q: 50.366 Interval 308 (153500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0860 Interval 309 (154000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2020 1 episodes - episode_reward: 156.957 [156.957, 156.957] - loss: 6.077 - mae: 38.603 - mean_q: 50.688 Interval 310 (154500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0868 3 episodes - episode_reward: -0.619 [-102.281, 196.915] - loss: 5.395 - mae: 38.333 - mean_q: 50.300 Interval 311 (155000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1915 2 episodes - episode_reward: 52.294 [-122.963, 227.552] - loss: 6.334 - mae: 38.849 - mean_q: 50.778 Interval 312 (155500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5915 1 episodes - episode_reward: 200.949 [200.949, 200.949] - loss: 5.397 - mae: 38.794 - mean_q: 50.782 Interval 313 (156000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4978 1 episodes - episode_reward: 230.666 [230.666, 230.666] - loss: 4.593 - mae: 39.063 - mean_q: 51.137 Interval 314 (156500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5214 1 episodes - episode_reward: 281.256 [281.256, 281.256] - loss: 5.947 - mae: 39.144 - mean_q: 51.301 Interval 315 (157000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5689 1 episodes - episode_reward: 250.226 [250.226, 250.226] - loss: 5.708 - mae: 39.319 - mean_q: 51.539 Interval 316 (157500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5415 2 episodes - episode_reward: 144.473 [-2.177, 291.123] - loss: 8.261 - mae: 39.242 - mean_q: 51.602 Interval 317 (158000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0649 Interval 318 (158500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4403 1 episodes - episode_reward: 167.189 [167.189, 167.189] - loss: 5.009 - mae: 39.197 - mean_q: 51.266 Interval 319 (159000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5834 2 episodes - episode_reward: 208.050 [195.258, 220.842] - loss: 6.425 - mae: 39.301 - mean_q: 51.371 Interval 320 (159500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8004 1 episodes - episode_reward: 261.204 [261.204, 261.204] - loss: 6.188 - mae: 39.472 - mean_q: 51.521 Interval 321 (160000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8623 2 episodes - episode_reward: 258.346 [217.486, 299.205] - loss: 5.021 - mae: 39.514 - mean_q: 51.668 Interval 322 (160500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3783 1 episodes - episode_reward: 285.495 [285.495, 285.495] - loss: 4.696 - mae: 39.659 - mean_q: 51.605 Interval 323 (161000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4110 Interval 324 (161500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3926 1 episodes - episode_reward: 258.019 [258.019, 258.019] - loss: 4.511 - mae: 39.753 - mean_q: 52.065 Interval 325 (162000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 1.0293 2 episodes - episode_reward: 231.387 [209.750, 253.025] - loss: 5.224 - mae: 39.808 - mean_q: 51.928 Interval 326 (162500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0687 Interval 327 (163000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0126 Interval 328 (163500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6145 2 episodes - episode_reward: 216.904 [215.872, 217.936] - loss: 7.927 - mae: 39.574 - mean_q: 51.782 Interval 329 (164000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4233 1 episodes - episode_reward: 210.530 [210.530, 210.530] - loss: 4.697 - mae: 39.565 - mean_q: 51.551 Interval 330 (164500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1840 Interval 331 (165000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1468 2 episodes - episode_reward: 67.382 [-52.286, 187.049] - loss: 6.648 - mae: 39.955 - mean_q: 52.411 Interval 332 (165500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3502 Interval 333 (166000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3477 1 episodes - episode_reward: 300.133 [300.133, 300.133] - loss: 6.504 - mae: 40.278 - mean_q: 52.761 Interval 334 (166500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8304 2 episodes - episode_reward: 211.228 [183.406, 239.051] - loss: 6.829 - mae: 40.407 - mean_q: 53.143 Interval 335 (167000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5786 1 episodes - episode_reward: 267.617 [267.617, 267.617] - loss: 5.274 - mae: 40.748 - mean_q: 53.420 Interval 336 (167500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3572 2 episodes - episode_reward: 67.190 [-53.500, 187.880] - loss: 5.432 - mae: 40.403 - mean_q: 53.133 Interval 337 (168000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2827 2 episodes - episode_reward: 69.130 [-94.828, 233.088] - loss: 4.859 - mae: 40.568 - mean_q: 53.337 Interval 338 (168500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3287 1 episodes - episode_reward: 250.515 [250.515, 250.515] - loss: 6.916 - mae: 40.705 - mean_q: 53.550 Interval 339 (169000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2365 1 episodes - episode_reward: 149.346 [149.346, 149.346] - loss: 6.539 - mae: 40.908 - mean_q: 53.643 Interval 340 (169500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5785 2 episodes - episode_reward: 176.600 [162.175, 191.025] - loss: 5.779 - mae: 41.119 - mean_q: 53.896 Interval 341 (170000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0342 Interval 342 (170500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3813 1 episodes - episode_reward: 98.501 [98.501, 98.501] - loss: 5.461 - mae: 41.160 - mean_q: 54.199 Interval 343 (171000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6425 1 episodes - episode_reward: 267.362 [267.362, 267.362] - loss: 6.544 - mae: 41.158 - mean_q: 54.275 Interval 344 (171500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6315 2 episodes - episode_reward: 224.534 [181.093, 267.976] - loss: 4.897 - mae: 41.515 - mean_q: 54.430 Interval 345 (172000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0683 Interval 346 (172500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3123 1 episodes - episode_reward: 119.674 [119.674, 119.674] - loss: 6.933 - mae: 40.996 - mean_q: 53.731 Interval 347 (173000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7433 2 episodes - episode_reward: 215.284 [186.310, 244.258] - loss: 5.905 - mae: 41.211 - mean_q: 54.108 Interval 348 (173500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5723 2 episodes - episode_reward: -148.543 [-183.807, -113.279] - loss: 5.981 - mae: 41.008 - mean_q: 53.643 Interval 349 (174000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3329 Interval 350 (174500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6531 3 episodes - episode_reward: 136.475 [-106.112, 274.032] - loss: 6.986 - mae: 41.761 - mean_q: 54.728 Interval 351 (175000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5884 1 episodes - episode_reward: 297.048 [297.048, 297.048] - loss: 8.254 - mae: 41.710 - mean_q: 54.608 Interval 352 (175500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3415 1 episodes - episode_reward: 232.858 [232.858, 232.858] - loss: 5.892 - mae: 41.491 - mean_q: 54.185 Interval 353 (176000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6799 1 episodes - episode_reward: 234.666 [234.666, 234.666] - loss: 5.313 - mae: 41.603 - mean_q: 54.511 Interval 354 (176500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1530 1 episodes - episode_reward: 210.732 [210.732, 210.732] - loss: 7.929 - mae: 41.902 - mean_q: 54.850 Interval 355 (177000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0240 2 episodes - episode_reward: -61.162 [-106.186, -16.139] - loss: 6.048 - mae: 41.745 - mean_q: 54.818 Interval 356 (177500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4509 1 episodes - episode_reward: 243.187 [243.187, 243.187] - loss: 7.400 - mae: 41.735 - mean_q: 54.714 Interval 357 (178000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1185 1 episodes - episode_reward: 217.061 [217.061, 217.061] - loss: 8.908 - mae: 41.401 - mean_q: 54.226 Interval 358 (178500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2734 2 episodes - episode_reward: 45.322 [-108.445, 199.090] - loss: 5.929 - mae: 41.422 - mean_q: 54.413 Interval 359 (179000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5413 1 episodes - episode_reward: 195.556 [195.556, 195.556] - loss: 6.630 - mae: 41.528 - mean_q: 54.577 Interval 360 (179500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6715 3 episodes - episode_reward: 130.678 [14.964, 193.893] - loss: 6.021 - mae: 41.495 - mean_q: 54.783 Interval 361 (180000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6248 1 episodes - episode_reward: 302.395 [302.395, 302.395] - loss: 4.733 - mae: 41.975 - mean_q: 55.316 Interval 362 (180500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5069 1 episodes - episode_reward: 183.745 [183.745, 183.745] - loss: 8.393 - mae: 42.139 - mean_q: 55.604 Interval 363 (181000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5261 1 episodes - episode_reward: 247.973 [247.973, 247.973] - loss: 6.812 - mae: 42.574 - mean_q: 56.026 Interval 364 (181500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7621 2 episodes - episode_reward: 239.520 [217.932, 261.107] - loss: 6.806 - mae: 42.893 - mean_q: 56.334 Interval 365 (182000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5886 2 episodes - episode_reward: 95.601 [-26.185, 217.386] - loss: 6.182 - mae: 42.717 - mean_q: 56.270 Interval 366 (182500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2086 1 episodes - episode_reward: 205.017 [205.017, 205.017] - loss: 6.261 - mae: 42.785 - mean_q: 56.233 Interval 367 (183000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1061 3 episodes - episode_reward: -0.916 [-100.000, 194.387] - loss: 7.509 - mae: 43.326 - mean_q: 57.085 Interval 368 (183500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3651 1 episodes - episode_reward: 288.866 [288.866, 288.866] - loss: 6.403 - mae: 42.979 - mean_q: 56.407 Interval 369 (184000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4414 2 episodes - episode_reward: 48.943 [-107.579, 205.466] - loss: 7.631 - mae: 42.458 - mean_q: 55.948 Interval 370 (184500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2894 2 episodes - episode_reward: 97.380 [-1.084, 195.844] - loss: 7.286 - mae: 42.809 - mean_q: 56.237 Interval 371 (185000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3534 1 episodes - episode_reward: 248.977 [248.977, 248.977] - loss: 7.939 - mae: 42.777 - mean_q: 56.303 Interval 372 (185500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7941 1 episodes - episode_reward: 179.775 [179.775, 179.775] - loss: 7.989 - mae: 43.016 - mean_q: 56.453 Interval 373 (186000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4778 1 episodes - episode_reward: 305.705 [305.705, 305.705] - loss: 6.133 - mae: 43.009 - mean_q: 56.480 Interval 374 (186500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3032 1 episodes - episode_reward: 230.768 [230.768, 230.768] - loss: 4.404 - mae: 42.840 - mean_q: 56.410 Interval 375 (187000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2527 Interval 376 (187500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.4252 1 episodes - episode_reward: 87.469 [87.469, 87.469] - loss: 6.831 - mae: 42.589 - mean_q: 55.768 Interval 377 (188000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2733 Interval 378 (188500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1054 3 episodes - episode_reward: 38.036 [-108.172, 277.194] - loss: 5.840 - mae: 42.750 - mean_q: 55.884 Interval 379 (189000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0372 Interval 380 (189500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3186 1 episodes - episode_reward: 128.502 [128.502, 128.502] - loss: 5.933 - mae: 42.637 - mean_q: 56.053 Interval 381 (190000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5391 1 episodes - episode_reward: 172.706 [172.706, 172.706] - loss: 6.939 - mae: 42.304 - mean_q: 55.528 Interval 382 (190500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5135 1 episodes - episode_reward: 267.593 [267.593, 267.593] - loss: 6.661 - mae: 42.634 - mean_q: 56.127 Interval 383 (191000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1512 1 episodes - episode_reward: 224.628 [224.628, 224.628] - loss: 5.556 - mae: 42.513 - mean_q: 56.165 Interval 384 (191500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6084 1 episodes - episode_reward: 247.740 [247.740, 247.740] - loss: 5.630 - mae: 42.617 - mean_q: 56.133 Interval 385 (192000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6778 2 episodes - episode_reward: 194.123 [171.730, 216.515] - loss: 6.257 - mae: 42.761 - mean_q: 56.727 Interval 386 (192500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1899 Interval 387 (193000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0402 Interval 388 (193500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0165 4 episodes - episode_reward: -15.999 [-151.999, 178.497] - loss: 6.886 - mae: 43.014 - mean_q: 56.735 Interval 389 (194000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4382 1 episodes - episode_reward: 235.428 [235.428, 235.428] - loss: 5.366 - mae: 43.136 - mean_q: 57.227 Interval 390 (194500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6307 2 episodes - episode_reward: 216.164 [200.993, 231.336] - loss: 8.511 - mae: 42.878 - mean_q: 56.680 Interval 391 (195000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4143 1 episodes - episode_reward: 244.786 [244.786, 244.786] - loss: 6.345 - mae: 43.600 - mean_q: 57.832 Interval 392 (195500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0432 2 episodes - episode_reward: -18.396 [-245.497, 208.706] - loss: 5.295 - mae: 43.430 - mean_q: 57.502 Interval 393 (196000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6230 1 episodes - episode_reward: 176.007 [176.007, 176.007] - loss: 7.474 - mae: 44.291 - mean_q: 58.639 Interval 394 (196500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3534 1 episodes - episode_reward: 189.696 [189.696, 189.696] - loss: 7.395 - mae: 44.442 - mean_q: 59.074 Interval 395 (197000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3295 2 episodes - episode_reward: 113.592 [-0.620, 227.803] - loss: 6.725 - mae: 45.130 - mean_q: 59.728 Interval 396 (197500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5672 1 episodes - episode_reward: 209.511 [209.511, 209.511] - loss: 7.575 - mae: 45.478 - mean_q: 60.151 Interval 397 (198000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0943 Interval 398 (198500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2362 1 episodes - episode_reward: 150.105 [150.105, 150.105] - loss: 5.714 - mae: 46.030 - mean_q: 60.770 Interval 399 (199000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4294 2 episodes - episode_reward: 100.198 [-71.197, 271.593] - loss: 6.471 - mae: 46.490 - mean_q: 61.653 Interval 400 (199500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3478 2 episodes - episode_reward: 111.797 [-29.949, 253.543] - loss: 6.939 - mae: 47.163 - mean_q: 62.747 Interval 401 (200000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4645 2 episodes - episode_reward: -115.858 [-206.315, -25.402] - loss: 5.503 - mae: 47.479 - mean_q: 63.010 Interval 402 (200500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6163 3 episodes - episode_reward: -103.793 [-440.278, 257.391] - loss: 9.931 - mae: 47.998 - mean_q: 63.529 Interval 403 (201000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4335 1 episodes - episode_reward: 220.619 [220.619, 220.619] - loss: 8.987 - mae: 47.893 - mean_q: 63.265 Interval 404 (201500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3634 2 episodes - episode_reward: -10.933 [-24.400, 2.534] - loss: 12.051 - mae: 48.256 - mean_q: 64.008 Interval 405 (202000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8662 2 episodes - episode_reward: -306.731 [-327.885, -285.577] - loss: 7.939 - mae: 48.339 - mean_q: 64.000 Interval 406 (202500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5025 1 episodes - episode_reward: 240.841 [240.841, 240.841] - loss: 7.703 - mae: 48.424 - mean_q: 64.216 Interval 407 (203000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2739 1 episodes - episode_reward: -110.564 [-110.564, -110.564] - loss: 7.809 - mae: 48.538 - mean_q: 63.928 Interval 408 (203500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1471 3 episodes - episode_reward: -26.676 [-60.061, 15.649] - loss: 8.893 - mae: 48.709 - mean_q: 64.583 Interval 409 (204000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3689 3 episodes - episode_reward: 61.886 [-36.151, 257.149] - loss: 7.521 - mae: 48.562 - mean_q: 64.301 Interval 410 (204500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5054 4 episodes - episode_reward: -52.740 [-148.381, 43.187] - loss: 7.115 - mae: 48.475 - mean_q: 64.106 Interval 411 (205000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4923 2 episodes - episode_reward: 105.539 [-4.810, 215.889] - loss: 9.944 - mae: 48.418 - mean_q: 63.906 Interval 412 (205500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3200 1 episodes - episode_reward: 162.554 [162.554, 162.554] - loss: 9.589 - mae: 47.919 - mean_q: 63.411 Interval 413 (206000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0694 1 episodes - episode_reward: -43.233 [-43.233, -43.233] - loss: 8.449 - mae: 48.049 - mean_q: 63.643 Interval 414 (206500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4189 1 episodes - episode_reward: 267.276 [267.276, 267.276] - loss: 6.932 - mae: 48.464 - mean_q: 64.113 Interval 415 (207000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3384 2 episodes - episode_reward: 114.817 [3.436, 226.198] - loss: 7.589 - mae: 48.445 - mean_q: 63.697 Interval 416 (207500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7537 1 episodes - episode_reward: 211.200 [211.200, 211.200] - loss: 8.644 - mae: 48.654 - mean_q: 64.276 Interval 417 (208000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3731 2 episodes - episode_reward: 119.670 [10.653, 228.686] - loss: 7.182 - mae: 48.635 - mean_q: 64.185 Interval 418 (208500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0219 Interval 419 (209000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2135 1 episodes - episode_reward: 178.170 [178.170, 178.170] - loss: 7.363 - mae: 49.007 - mean_q: 64.904 Interval 420 (209500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1174 Interval 421 (210000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1500 2 episodes - episode_reward: 65.894 [-46.337, 178.125] - loss: 8.735 - mae: 48.373 - mean_q: 63.743 Interval 422 (210500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4483 1 episodes - episode_reward: 180.592 [180.592, 180.592] - loss: 6.851 - mae: 48.578 - mean_q: 64.310 Interval 423 (211000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1571 2 episodes - episode_reward: -55.833 [-100.000, -11.665] - loss: 8.307 - mae: 48.576 - mean_q: 64.292 Interval 424 (211500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0245 Interval 425 (212000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0244 Interval 426 (212500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0212 Interval 427 (213000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0479 Interval 428 (213500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2612 1 episodes - episode_reward: 137.910 [137.910, 137.910] - loss: 12.491 - mae: 48.554 - mean_q: 63.635 Interval 429 (214000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1495 Interval 430 (214500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4159 1 episodes - episode_reward: 196.600 [196.600, 196.600] - loss: 8.356 - mae: 47.882 - mean_q: 62.926 Interval 431 (215000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2697 1 episodes - episode_reward: 203.146 [203.146, 203.146] - loss: 8.865 - mae: 47.863 - mean_q: 62.838 Interval 432 (215500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1317 1 episodes - episode_reward: 124.772 [124.772, 124.772] - loss: 6.439 - mae: 47.590 - mean_q: 62.372 Interval 433 (216000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2672 Interval 434 (216500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4706 1 episodes - episode_reward: 214.393 [214.393, 214.393] - loss: 8.162 - mae: 47.349 - mean_q: 62.248 Interval 435 (217000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0412 2 episodes - episode_reward: 65.027 [-100.000, 230.053] - loss: 7.468 - mae: 47.048 - mean_q: 61.888 Interval 436 (217500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5745 1 episodes - episode_reward: 211.185 [211.185, 211.185] - loss: 7.288 - mae: 46.848 - mean_q: 61.528 Interval 437 (218000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4427 1 episodes - episode_reward: 282.464 [282.464, 282.464] - loss: 7.333 - mae: 46.750 - mean_q: 61.354 Interval 438 (218500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1158 1 episodes - episode_reward: -183.904 [-183.904, -183.904] - loss: 6.594 - mae: 46.886 - mean_q: 61.670 Interval 439 (219000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1971 2 episodes - episode_reward: 133.259 [15.837, 250.681] - loss: 8.513 - mae: 46.516 - mean_q: 61.024 Interval 440 (219500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4057 2 episodes - episode_reward: 71.969 [-75.270, 219.208] - loss: 9.086 - mae: 46.630 - mean_q: 61.385 Interval 441 (220000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8344 1 episodes - episode_reward: 218.495 [218.495, 218.495] - loss: 6.875 - mae: 46.596 - mean_q: 61.227 Interval 442 (220500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5071 1 episodes - episode_reward: 314.636 [314.636, 314.636] - loss: 7.527 - mae: 46.872 - mean_q: 61.380 Interval 443 (221000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0417 Interval 444 (221500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4910 1 episodes - episode_reward: 217.872 [217.872, 217.872] - loss: 8.164 - mae: 46.563 - mean_q: 61.410 Interval 445 (222000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6585 1 episodes - episode_reward: 318.062 [318.062, 318.062] - loss: 7.023 - mae: 46.992 - mean_q: 61.878 Interval 446 (222500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3814 1 episodes - episode_reward: 273.044 [273.044, 273.044] - loss: 7.716 - mae: 46.852 - mean_q: 61.402 Interval 447 (223000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3226 3 episodes - episode_reward: 90.272 [-163.206, 242.172] - loss: 6.169 - mae: 47.013 - mean_q: 62.020 Interval 448 (223500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2673 Interval 449 (224000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8367 2 episodes - episode_reward: 224.092 [222.681, 225.503] - loss: 10.429 - mae: 46.896 - mean_q: 61.773 Interval 450 (224500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6798 1 episodes - episode_reward: 255.078 [255.078, 255.078] - loss: 7.128 - mae: 47.135 - mean_q: 61.830 Interval 451 (225000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7266 2 episodes - episode_reward: 253.361 [237.838, 268.884] - loss: 7.914 - mae: 47.249 - mean_q: 62.226 Interval 452 (225500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6913 1 episodes - episode_reward: 296.768 [296.768, 296.768] - loss: 7.726 - mae: 47.269 - mean_q: 62.185 Interval 453 (226000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5230 1 episodes - episode_reward: 234.797 [234.797, 234.797] - loss: 8.781 - mae: 47.355 - mean_q: 62.046 Interval 454 (226500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3533 1 episodes - episode_reward: 199.621 [199.621, 199.621] - loss: 8.102 - mae: 47.299 - mean_q: 62.192 Interval 455 (227000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4569 1 episodes - episode_reward: 176.077 [176.077, 176.077] - loss: 7.136 - mae: 47.553 - mean_q: 62.491 Interval 456 (227500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7694 2 episodes - episode_reward: 233.095 [221.233, 244.957] - loss: 6.741 - mae: 47.243 - mean_q: 62.089 Interval 457 (228000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0338 3 episodes - episode_reward: 13.806 [-128.824, 270.243] - loss: 9.143 - mae: 47.866 - mean_q: 63.079 Interval 458 (228500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1879 3 episodes - episode_reward: 19.461 [-111.878, 258.818] - loss: 8.771 - mae: 48.109 - mean_q: 63.116 Interval 459 (229000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 1.0360 2 episodes - episode_reward: 282.974 [271.710, 294.238] - loss: 8.860 - mae: 47.887 - mean_q: 62.876 Interval 460 (229500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3577 1 episodes - episode_reward: 248.893 [248.893, 248.893] - loss: 6.890 - mae: 47.671 - mean_q: 62.623 Interval 461 (230000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5068 1 episodes - episode_reward: 189.022 [189.022, 189.022] - loss: 5.522 - mae: 47.236 - mean_q: 62.062 Interval 462 (230500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7428 1 episodes - episode_reward: 297.850 [297.850, 297.850] - loss: 7.923 - mae: 47.686 - mean_q: 62.596 Interval 463 (231000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8360 2 episodes - episode_reward: 244.118 [240.773, 247.463] - loss: 7.304 - mae: 47.872 - mean_q: 62.978 Interval 464 (231500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7242 1 episodes - episode_reward: 288.431 [288.431, 288.431] - loss: 8.506 - mae: 47.887 - mean_q: 62.897 Interval 465 (232000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5109 3 episodes - episode_reward: 107.750 [-119.774, 229.267] - loss: 5.792 - mae: 47.786 - mean_q: 62.826 Interval 466 (232500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2905 1 episodes - episode_reward: 212.609 [212.609, 212.609] - loss: 9.247 - mae: 47.359 - mean_q: 62.107 Interval 467 (233000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5094 1 episodes - episode_reward: 184.441 [184.441, 184.441] - loss: 8.478 - mae: 47.628 - mean_q: 62.581 Interval 468 (233500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3897 1 episodes - episode_reward: 256.872 [256.872, 256.872] - loss: 6.911 - mae: 47.213 - mean_q: 62.179 Interval 469 (234000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7666 1 episodes - episode_reward: 249.496 [249.496, 249.496] - loss: 6.325 - mae: 46.942 - mean_q: 61.732 Interval 470 (234500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3076 1 episodes - episode_reward: 228.645 [228.645, 228.645] - loss: 8.969 - mae: 47.023 - mean_q: 62.008 Interval 471 (235000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1317 Interval 472 (235500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3302 2 episodes - episode_reward: 38.996 [-104.020, 182.012] - loss: 7.297 - mae: 46.442 - mean_q: 61.430 Interval 473 (236000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5316 3 episodes - episode_reward: 124.103 [-109.068, 267.882] - loss: 7.682 - mae: 46.752 - mean_q: 61.439 Interval 474 (236500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1039 2 episodes - episode_reward: 49.462 [-68.449, 167.374] - loss: 9.942 - mae: 46.910 - mean_q: 61.792 Interval 475 (237000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5002 1 episodes - episode_reward: 253.194 [253.194, 253.194] - loss: 8.745 - mae: 46.783 - mean_q: 61.327 Interval 476 (237500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6088 1 episodes - episode_reward: 274.680 [274.680, 274.680] - loss: 7.359 - mae: 46.857 - mean_q: 61.616 Interval 477 (238000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0622 Interval 478 (238500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6867 2 episodes - episode_reward: 182.300 [180.767, 183.833] - loss: 7.510 - mae: 46.795 - mean_q: 61.254 Interval 479 (239000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1059 2 episodes - episode_reward: 49.075 [-130.003, 228.154] - loss: 7.737 - mae: 46.995 - mean_q: 61.628 Interval 480 (239500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4499 2 episodes - episode_reward: 88.180 [-110.063, 286.424] - loss: 6.834 - mae: 47.205 - mean_q: 61.719 Interval 481 (240000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.9132 2 episodes - episode_reward: 255.697 [247.874, 263.519] - loss: 6.703 - mae: 47.048 - mean_q: 61.789 Interval 482 (240500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7390 1 episodes - episode_reward: 299.348 [299.348, 299.348] - loss: 8.691 - mae: 47.315 - mean_q: 62.435 Interval 483 (241000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4021 2 episodes - episode_reward: 62.076 [-90.283, 214.436] - loss: 7.610 - mae: 47.580 - mean_q: 62.384 Interval 484 (241500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6064 1 episodes - episode_reward: 244.595 [244.595, 244.595] - loss: 8.493 - mae: 48.149 - mean_q: 63.253 Interval 485 (242000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6036 1 episodes - episode_reward: 334.742 [334.742, 334.742] - loss: 6.539 - mae: 48.257 - mean_q: 62.988 Interval 486 (242500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4904 1 episodes - episode_reward: 280.699 [280.699, 280.699] - loss: 6.777 - mae: 48.303 - mean_q: 63.213 Interval 487 (243000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7419 2 episodes - episode_reward: 235.545 [235.403, 235.687] - loss: 9.489 - mae: 48.516 - mean_q: 63.516 Interval 488 (243500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1465 2 episodes - episode_reward: 48.608 [-74.907, 172.123] - loss: 7.326 - mae: 48.752 - mean_q: 63.962 Interval 489 (244000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8116 1 episodes - episode_reward: 213.065 [213.065, 213.065] - loss: 5.857 - mae: 48.741 - mean_q: 63.624 Interval 490 (244500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7139 5 episodes - episode_reward: -36.892 [-161.462, 305.789] - loss: 8.916 - mae: 48.897 - mean_q: 63.546 Interval 491 (245000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3635 1 episodes - episode_reward: 198.204 [198.204, 198.204] - loss: 9.167 - mae: 48.791 - mean_q: 63.233 Interval 492 (245500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4370 1 episodes - episode_reward: 207.605 [207.605, 207.605] - loss: 5.082 - mae: 48.860 - mean_q: 63.872 Interval 493 (246000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1357 Interval 494 (246500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7191 2 episodes - episode_reward: 213.317 [190.080, 236.555] - loss: 7.102 - mae: 48.501 - mean_q: 63.256 Interval 495 (247000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4646 1 episodes - episode_reward: 243.077 [243.077, 243.077] - loss: 6.282 - mae: 49.049 - mean_q: 64.038 Interval 496 (247500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3032 2 episodes - episode_reward: 47.955 [-98.894, 194.805] - loss: 6.881 - mae: 48.939 - mean_q: 63.674 Interval 497 (248000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5806 1 episodes - episode_reward: 242.693 [242.693, 242.693] - loss: 6.702 - mae: 48.607 - mean_q: 63.574 Interval 498 (248500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4809 2 episodes - episode_reward: 122.768 [-32.853, 278.388] - loss: 7.934 - mae: 49.042 - mean_q: 64.161 Interval 499 (249000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8725 2 episodes - episode_reward: 274.304 [238.742, 309.866] - loss: 8.221 - mae: 48.955 - mean_q: 63.938 Interval 500 (249500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3582 2 episodes - episode_reward: 49.152 [-163.089, 261.393] - loss: 6.284 - mae: 48.709 - mean_q: 63.649 Interval 501 (250000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6066 1 episodes - episode_reward: 262.245 [262.245, 262.245] - loss: 6.389 - mae: 48.858 - mean_q: 64.002 Interval 502 (250500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4660 1 episodes - episode_reward: 232.837 [232.837, 232.837] - loss: 8.901 - mae: 48.396 - mean_q: 63.741 Interval 503 (251000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5676 3 episodes - episode_reward: 127.225 [-105.219, 265.182] - loss: 7.660 - mae: 48.171 - mean_q: 63.219 Interval 504 (251500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3011 4 episodes - episode_reward: -31.037 [-152.519, 275.606] - loss: 7.497 - mae: 48.221 - mean_q: 63.442 Interval 505 (252000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0682 2 episodes - episode_reward: -17.966 [-322.135, 286.203] - loss: 6.418 - mae: 47.594 - mean_q: 62.430 Interval 506 (252500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6214 2 episodes - episode_reward: 93.224 [-100.000, 286.447] - loss: 7.328 - mae: 48.193 - mean_q: 63.095 Interval 507 (253000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4201 1 episodes - episode_reward: 303.877 [303.877, 303.877] - loss: 8.665 - mae: 47.973 - mean_q: 62.719 Interval 508 (253500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4688 1 episodes - episode_reward: 191.625 [191.625, 191.625] - loss: 7.984 - mae: 48.342 - mean_q: 63.225 Interval 509 (254000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2844 1 episodes - episode_reward: 165.732 [165.732, 165.732] - loss: 6.870 - mae: 47.791 - mean_q: 62.657 Interval 510 (254500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8290 3 episodes - episode_reward: 146.933 [-97.133, 302.029] - loss: 5.825 - mae: 47.761 - mean_q: 62.335 Interval 511 (255000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0160 2 episodes - episode_reward: 3.591 [-227.846, 235.028] - loss: 6.851 - mae: 47.737 - mean_q: 62.565 Interval 512 (255500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2625 Interval 513 (256000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7002 2 episodes - episode_reward: 224.472 [206.319, 242.625] - loss: 6.690 - mae: 47.901 - mean_q: 63.004 Interval 514 (256500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5192 1 episodes - episode_reward: 207.127 [207.127, 207.127] - loss: 6.259 - mae: 48.018 - mean_q: 63.459 Interval 515 (257000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1933 1 episodes - episode_reward: 183.301 [183.301, 183.301] - loss: 6.546 - mae: 47.719 - mean_q: 62.779 Interval 516 (257500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6648 1 episodes - episode_reward: 191.420 [191.420, 191.420] - loss: 7.712 - mae: 47.662 - mean_q: 62.540 Interval 517 (258000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1364 3 episodes - episode_reward: 57.907 [-109.659, 295.071] - loss: 7.154 - mae: 47.859 - mean_q: 62.775 Interval 518 (258500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6337 1 episodes - episode_reward: 232.406 [232.406, 232.406] - loss: 8.460 - mae: 47.930 - mean_q: 62.730 Interval 519 (259000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6006 1 episodes - episode_reward: 252.982 [252.982, 252.982] - loss: 7.234 - mae: 48.139 - mean_q: 63.233 Interval 520 (259500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1021 4 episodes - episode_reward: 76.685 [-147.544, 282.323] - loss: 7.136 - mae: 48.682 - mean_q: 63.810 Interval 521 (260000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.9737 1 episodes - episode_reward: 245.228 [245.228, 245.228] - loss: 6.323 - mae: 48.512 - mean_q: 63.635 Interval 522 (260500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7776 2 episodes - episode_reward: 256.424 [250.242, 262.605] - loss: 7.712 - mae: 48.899 - mean_q: 63.866 Interval 523 (261000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2046 Interval 524 (261500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9646 4 episodes - episode_reward: -93.997 [-279.289, 241.268] - loss: 7.562 - mae: 49.258 - mean_q: 64.535 Interval 525 (262000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8288 1 episodes - episode_reward: 276.894 [276.894, 276.894] - loss: 10.851 - mae: 49.746 - mean_q: 64.784 Interval 526 (262500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7235 2 episodes - episode_reward: 280.641 [235.004, 326.279] - loss: 8.444 - mae: 49.849 - mean_q: 65.223 Interval 527 (263000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4168 2 episodes - episode_reward: 38.385 [-115.109, 191.879] - loss: 7.327 - mae: 50.212 - mean_q: 65.451 Interval 528 (263500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3832 1 episodes - episode_reward: 210.882 [210.882, 210.882] - loss: 6.409 - mae: 49.833 - mean_q: 65.095 Interval 529 (264000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5738 1 episodes - episode_reward: 216.123 [216.123, 216.123] - loss: 8.276 - mae: 49.746 - mean_q: 64.907 Interval 530 (264500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6257 2 episodes - episode_reward: 246.464 [224.875, 268.052] - loss: 7.270 - mae: 49.915 - mean_q: 65.063 Interval 531 (265000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4926 1 episodes - episode_reward: 196.522 [196.522, 196.522] - loss: 7.093 - mae: 49.828 - mean_q: 64.853 Interval 532 (265500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3771 1 episodes - episode_reward: 237.573 [237.573, 237.573] - loss: 6.353 - mae: 50.091 - mean_q: 65.335 Interval 533 (266000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2319 3 episodes - episode_reward: 28.030 [-99.090, 277.560] - loss: 6.958 - mae: 50.072 - mean_q: 64.959 Interval 534 (266500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3739 2 episodes - episode_reward: 107.058 [-22.412, 236.528] - loss: 7.015 - mae: 50.446 - mean_q: 65.576 Interval 535 (267000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3979 3 episodes - episode_reward: 29.736 [-118.335, 250.509] - loss: 11.071 - mae: 50.660 - mean_q: 65.380 Interval 536 (267500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4164 3 episodes - episode_reward: -39.766 [-162.744, 190.831] - loss: 6.158 - mae: 50.426 - mean_q: 65.528 Interval 537 (268000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4287 1 episodes - episode_reward: 223.305 [223.305, 223.305] - loss: 8.670 - mae: 50.530 - mean_q: 65.327 Interval 538 (268500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3048 1 episodes - episode_reward: 146.742 [146.742, 146.742] - loss: 6.225 - mae: 50.243 - mean_q: 65.369 Interval 539 (269000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6743 1 episodes - episode_reward: 224.740 [224.740, 224.740] - loss: 7.575 - mae: 50.522 - mean_q: 65.472 Interval 540 (269500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3306 4 episodes - episode_reward: 65.213 [-123.628, 289.508] - loss: 8.062 - mae: 50.488 - mean_q: 65.063 Interval 541 (270000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5840 2 episodes - episode_reward: 165.533 [13.333, 317.734] - loss: 7.164 - mae: 50.730 - mean_q: 65.063 Interval 542 (270500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1914 2 episodes - episode_reward: -34.157 [-88.035, 19.720] - loss: 12.043 - mae: 50.681 - mean_q: 65.447 Interval 543 (271000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5265 1 episodes - episode_reward: 238.183 [238.183, 238.183] - loss: 12.670 - mae: 51.465 - mean_q: 66.385 Interval 544 (271500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6926 2 episodes - episode_reward: 265.981 [256.219, 275.742] - loss: 9.524 - mae: 51.082 - mean_q: 66.197 Interval 545 (272000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0684 Interval 546 (272500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 1.1408 2 episodes - episode_reward: 230.162 [142.313, 318.011] - loss: 9.114 - mae: 51.057 - mean_q: 65.729 Interval 547 (273000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2678 2 episodes - episode_reward: 74.449 [-132.000, 280.897] - loss: 8.146 - mae: 51.132 - mean_q: 65.971 Interval 548 (273500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6679 2 episodes - episode_reward: 221.992 [219.915, 224.069] - loss: 10.232 - mae: 51.108 - mean_q: 65.911 Interval 549 (274000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8278 1 episodes - episode_reward: 254.313 [254.313, 254.313] - loss: 14.383 - mae: 51.313 - mean_q: 65.694 Interval 550 (274500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3645 5 episodes - episode_reward: -13.637 [-111.101, 258.241] - loss: 8.386 - mae: 50.789 - mean_q: 65.218 Interval 551 (275000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8002 2 episodes - episode_reward: 221.413 [194.133, 248.693] - loss: 8.386 - mae: 50.991 - mean_q: 65.586 Interval 552 (275500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6748 1 episodes - episode_reward: 215.179 [215.179, 215.179] - loss: 7.733 - mae: 50.727 - mean_q: 64.947 Interval 553 (276000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6753 2 episodes - episode_reward: 254.471 [231.484, 277.457] - loss: 8.618 - mae: 50.921 - mean_q: 65.382 Interval 554 (276500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4875 1 episodes - episode_reward: 211.505 [211.505, 211.505] - loss: 9.186 - mae: 51.492 - mean_q: 65.596 Interval 555 (277000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4484 2 episodes - episode_reward: 32.304 [-141.387, 205.996] - loss: 7.150 - mae: 51.887 - mean_q: 66.163 Interval 556 (277500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3149 3 episodes - episode_reward: 52.170 [-150.813, 244.686] - loss: 14.725 - mae: 51.599 - mean_q: 65.876 Interval 557 (278000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0788 Interval 558 (278500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1367 2 episodes - episode_reward: 65.664 [-71.051, 202.379] - loss: 10.893 - mae: 51.435 - mean_q: 66.040 Interval 559 (279000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5159 1 episodes - episode_reward: 258.550 [258.550, 258.550] - loss: 8.373 - mae: 51.969 - mean_q: 66.629 Interval 560 (279500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0812 3 episodes - episode_reward: 25.313 [-122.838, 234.652] - loss: 6.939 - mae: 51.818 - mean_q: 66.419 Interval 561 (280000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0539 4 episodes - episode_reward: -16.984 [-100.000, 232.064] - loss: 9.516 - mae: 51.840 - mean_q: 66.468 Interval 562 (280500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8683 2 episodes - episode_reward: 225.102 [223.581, 226.622] - loss: 7.883 - mae: 52.058 - mean_q: 66.970 Interval 563 (281000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 1.3079 2 episodes - episode_reward: 277.605 [249.790, 305.421] - loss: 7.522 - mae: 52.069 - mean_q: 67.088 Interval 564 (281500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6571 1 episodes - episode_reward: 246.928 [246.928, 246.928] - loss: 10.212 - mae: 52.372 - mean_q: 66.748 Interval 565 (282000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5739 1 episodes - episode_reward: 316.537 [316.537, 316.537] - loss: 9.819 - mae: 52.842 - mean_q: 67.372 Interval 566 (282500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3984 1 episodes - episode_reward: 281.870 [281.870, 281.870] - loss: 7.859 - mae: 53.163 - mean_q: 67.956 Interval 567 (283000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0152 2 episodes - episode_reward: 61.510 [-103.164, 226.183] - loss: 8.433 - mae: 53.355 - mean_q: 68.201 Interval 568 (283500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4294 2 episodes - episode_reward: 61.262 [-175.333, 297.858] - loss: 9.186 - mae: 53.809 - mean_q: 68.562 Interval 569 (284000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7029 4 episodes - episode_reward: 106.875 [-100.000, 278.833] - loss: 14.629 - mae: 54.214 - mean_q: 68.881 Interval 570 (284500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5571 1 episodes - episode_reward: 214.740 [214.740, 214.740] - loss: 13.203 - mae: 54.315 - mean_q: 69.364 Interval 571 (285000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4279 1 episodes - episode_reward: 258.993 [258.993, 258.993] - loss: 10.518 - mae: 54.458 - mean_q: 69.284 Interval 572 (285500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6885 1 episodes - episode_reward: 194.530 [194.530, 194.530] - loss: 10.424 - mae: 54.883 - mean_q: 69.974 Interval 573 (286000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5460 1 episodes - episode_reward: 263.433 [263.433, 263.433] - loss: 10.294 - mae: 54.455 - mean_q: 69.476 Interval 574 (286500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1430 2 episodes - episode_reward: 80.471 [-100.000, 260.942] - loss: 10.494 - mae: 54.412 - mean_q: 69.715 Interval 575 (287000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4365 2 episodes - episode_reward: -207.572 [-299.652, -115.492] - loss: 9.333 - mae: 54.405 - mean_q: 69.046 Interval 576 (287500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1811 2 episodes - episode_reward: 32.201 [-161.363, 225.765] - loss: 14.445 - mae: 54.415 - mean_q: 69.711 Interval 577 (288000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3869 1 episodes - episode_reward: 155.087 [155.087, 155.087] - loss: 9.879 - mae: 54.514 - mean_q: 70.553 Interval 578 (288500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2437 2 episodes - episode_reward: -64.502 [-244.601, 115.597] - loss: 10.316 - mae: 54.420 - mean_q: 69.970 Interval 579 (289000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0230 2 episodes - episode_reward: -51.292 [-100.000, -2.583] - loss: 8.172 - mae: 54.515 - mean_q: 70.169 Interval 580 (289500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0873 Interval 581 (290000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1113 1 episodes - episode_reward: 177.590 [177.590, 177.590] - loss: 12.965 - mae: 53.878 - mean_q: 69.624 Interval 582 (290500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8387 1 episodes - episode_reward: 222.155 [222.155, 222.155] - loss: 11.531 - mae: 53.568 - mean_q: 69.026 Interval 583 (291000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3100 1 episodes - episode_reward: 259.584 [259.584, 259.584] - loss: 10.404 - mae: 53.231 - mean_q: 68.218 Interval 584 (291500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2627 2 episodes - episode_reward: -76.554 [-117.307, -35.800] - loss: 9.881 - mae: 53.053 - mean_q: 68.179 Interval 585 (292000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3302 1 episodes - episode_reward: 190.203 [190.203, 190.203] - loss: 12.694 - mae: 52.348 - mean_q: 67.908 Interval 586 (292500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3301 3 episodes - episode_reward: 41.755 [-107.839, 226.207] - loss: 9.330 - mae: 52.199 - mean_q: 67.491 Interval 587 (293000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0282 2 episodes - episode_reward: 18.381 [-13.106, 49.868] - loss: 13.512 - mae: 51.799 - mean_q: 66.540 Interval 588 (293500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5396 1 episodes - episode_reward: 213.406 [213.406, 213.406] - loss: 11.786 - mae: 52.299 - mean_q: 67.110 Interval 589 (294000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2360 2 episodes - episode_reward: 115.419 [0.773, 230.065] - loss: 8.324 - mae: 51.791 - mean_q: 66.472 Interval 590 (294500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2734 1 episodes - episode_reward: 194.396 [194.396, 194.396] - loss: 8.188 - mae: 51.492 - mean_q: 66.323 Interval 591 (295000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3049 2 episodes - episode_reward: 41.907 [-175.218, 259.032] - loss: 9.435 - mae: 51.278 - mean_q: 65.583 Interval 592 (295500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0127 1 episodes - episode_reward: -148.964 [-148.964, -148.964] - loss: 8.087 - mae: 51.505 - mean_q: 66.367 Interval 593 (296000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7966 2 episodes - episode_reward: 268.263 [253.130, 283.396] - loss: 12.766 - mae: 51.853 - mean_q: 66.246 Interval 594 (296500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4157 1 episodes - episode_reward: 252.515 [252.515, 252.515] - loss: 12.491 - mae: 51.653 - mean_q: 66.240 Interval 595 (297000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4304 1 episodes - episode_reward: 170.000 [170.000, 170.000] - loss: 6.989 - mae: 51.065 - mean_q: 65.424 Interval 596 (297500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1268 Interval 597 (298000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7469 2 episodes - episode_reward: 242.266 [208.045, 276.486] - loss: 7.289 - mae: 51.082 - mean_q: 66.128 Interval 598 (298500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1218 2 episodes - episode_reward: -54.511 [-93.311, -15.712] - loss: 15.462 - mae: 50.738 - mean_q: 64.844 Interval 599 (299000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6578 1 episodes - episode_reward: 241.752 [241.752, 241.752] - loss: 11.091 - mae: 50.983 - mean_q: 64.920 Interval 600 (299500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2918 1 episodes - episode_reward: 282.051 [282.051, 282.051] - loss: 11.099 - mae: 50.523 - mean_q: 65.025 Interval 601 (300000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2718 1 episodes - episode_reward: 146.385 [146.385, 146.385] - loss: 9.965 - mae: 50.268 - mean_q: 64.875 Interval 602 (300500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6151 1 episodes - episode_reward: 242.788 [242.788, 242.788] - loss: 9.063 - mae: 50.456 - mean_q: 64.983 Interval 603 (301000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0920 1 episodes - episode_reward: -76.461 [-76.461, -76.461] - loss: 8.523 - mae: 49.887 - mean_q: 65.107 Interval 604 (301500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4858 3 episodes - episode_reward: -32.662 [-272.708, 269.281] - loss: 13.125 - mae: 49.989 - mean_q: 65.156 Interval 605 (302000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5877 1 episodes - episode_reward: 215.230 [215.230, 215.230] - loss: 8.718 - mae: 50.064 - mean_q: 65.420 Interval 606 (302500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5035 1 episodes - episode_reward: 212.318 [212.318, 212.318] - loss: 11.433 - mae: 50.008 - mean_q: 64.998 Interval 607 (303000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3860 2 episodes - episode_reward: 111.967 [-44.653, 268.588] - loss: 8.055 - mae: 49.953 - mean_q: 65.180 Interval 608 (303500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5944 1 episodes - episode_reward: 252.360 [252.360, 252.360] - loss: 8.271 - mae: 49.865 - mean_q: 64.548 Interval 609 (304000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0047 Interval 610 (304500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3142 1 episodes - episode_reward: 244.810 [244.810, 244.810] - loss: 8.414 - mae: 49.348 - mean_q: 63.928 Interval 611 (305000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4777 1 episodes - episode_reward: 241.625 [241.625, 241.625] - loss: 8.523 - mae: 49.407 - mean_q: 64.175 Interval 612 (305500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4131 1 episodes - episode_reward: 244.727 [244.727, 244.727] - loss: 7.126 - mae: 49.384 - mean_q: 64.306 Interval 613 (306000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6463 1 episodes - episode_reward: 216.294 [216.294, 216.294] - loss: 6.390 - mae: 49.183 - mean_q: 63.861 Interval 614 (306500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0137 Interval 615 (307000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6090 1 episodes - episode_reward: 239.376 [239.376, 239.376] - loss: 6.165 - mae: 49.098 - mean_q: 64.060 Interval 616 (307500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1755 2 episodes - episode_reward: 99.521 [-127.594, 326.635] - loss: 8.235 - mae: 49.452 - mean_q: 63.730 Interval 617 (308000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4745 1 episodes - episode_reward: 253.110 [253.110, 253.110] - loss: 9.257 - mae: 49.062 - mean_q: 63.691 Interval 618 (308500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4549 1 episodes - episode_reward: 192.748 [192.748, 192.748] - loss: 8.704 - mae: 48.832 - mean_q: 63.422 Interval 619 (309000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.9473 2 episodes - episode_reward: 257.612 [216.305, 298.919] - loss: 8.653 - mae: 48.866 - mean_q: 63.544 Interval 620 (309500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1237 1 episodes - episode_reward: -106.486 [-106.486, -106.486] - loss: 9.708 - mae: 48.813 - mean_q: 63.475 Interval 621 (310000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4518 3 episodes - episode_reward: 113.716 [-96.103, 221.960] - loss: 7.429 - mae: 48.519 - mean_q: 63.368 Interval 622 (310500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5078 2 episodes - episode_reward: 90.083 [2.663, 177.503] - loss: 8.796 - mae: 48.999 - mean_q: 63.707 Interval 623 (311000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4663 1 episodes - episode_reward: 261.569 [261.569, 261.569] - loss: 5.991 - mae: 48.979 - mean_q: 63.732 Interval 624 (311500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6615 1 episodes - episode_reward: 243.794 [243.794, 243.794] - loss: 7.547 - mae: 49.027 - mean_q: 63.789 Interval 625 (312000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.9222 2 episodes - episode_reward: 275.448 [248.410, 302.487] - loss: 7.663 - mae: 49.116 - mean_q: 64.005 Interval 626 (312500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5891 1 episodes - episode_reward: 243.109 [243.109, 243.109] - loss: 7.534 - mae: 49.538 - mean_q: 64.822 Interval 627 (313000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5947 1 episodes - episode_reward: 289.315 [289.315, 289.315] - loss: 7.396 - mae: 49.381 - mean_q: 64.731 Interval 628 (313500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0687 2 episodes - episode_reward: 67.730 [-95.258, 230.719] - loss: 9.357 - mae: 49.777 - mean_q: 65.219 Interval 629 (314000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4102 1 episodes - episode_reward: 192.367 [192.367, 192.367] - loss: 7.616 - mae: 49.298 - mean_q: 64.282 Interval 630 (314500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7190 1 episodes - episode_reward: 298.919 [298.919, 298.919] - loss: 5.897 - mae: 49.112 - mean_q: 64.207 Interval 631 (315000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5269 1 episodes - episode_reward: 189.988 [189.988, 189.988] - loss: 7.861 - mae: 49.295 - mean_q: 64.287 Interval 632 (315500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3191 2 episodes - episode_reward: 68.075 [-115.775, 251.925] - loss: 9.879 - mae: 49.490 - mean_q: 64.346 Interval 633 (316000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5481 1 episodes - episode_reward: 276.301 [276.301, 276.301] - loss: 10.071 - mae: 49.476 - mean_q: 64.557 Interval 634 (316500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0281 2 episodes - episode_reward: 55.337 [-160.797, 271.470] - loss: 8.007 - mae: 49.670 - mean_q: 64.825 Interval 635 (317000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6647 1 episodes - episode_reward: 207.625 [207.625, 207.625] - loss: 6.418 - mae: 49.807 - mean_q: 65.080 Interval 636 (317500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3816 3 episodes - episode_reward: -16.344 [-224.148, 273.414] - loss: 9.128 - mae: 49.610 - mean_q: 64.862 Interval 637 (318000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6834 2 episodes - episode_reward: 185.714 [156.536, 214.893] - loss: 8.600 - mae: 49.658 - mean_q: 64.574 Interval 638 (318500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4613 2 episodes - episode_reward: 120.990 [-14.099, 256.078] - loss: 11.303 - mae: 49.797 - mean_q: 64.646 Interval 639 (319000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2391 Interval 640 (319500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5674 2 episodes - episode_reward: 191.814 [175.413, 208.215] - loss: 9.293 - mae: 49.805 - mean_q: 64.867 Interval 641 (320000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2092 Interval 642 (320500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0112 Interval 643 (321000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0459 Interval 644 (321500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3367 1 episodes - episode_reward: 201.057 [201.057, 201.057] - loss: 11.088 - mae: 49.187 - mean_q: 64.184 Interval 645 (322000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.9517 2 episodes - episode_reward: 254.188 [230.456, 277.920] - loss: 11.056 - mae: 49.043 - mean_q: 63.667 Interval 646 (322500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3378 4 episodes - episode_reward: 47.909 [-115.049, 213.222] - loss: 7.035 - mae: 49.109 - mean_q: 64.027 Interval 647 (323000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2717 2 episodes - episode_reward: 39.957 [-122.577, 202.492] - loss: 6.519 - mae: 49.456 - mean_q: 64.292 Interval 648 (323500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2579 1 episodes - episode_reward: 233.302 [233.302, 233.302] - loss: 6.816 - mae: 49.356 - mean_q: 64.432 Interval 649 (324000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6374 1 episodes - episode_reward: 188.575 [188.575, 188.575] - loss: 7.656 - mae: 49.314 - mean_q: 64.439 Interval 650 (324500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7633 2 episodes - episode_reward: 243.466 [228.448, 258.484] - loss: 10.344 - mae: 49.342 - mean_q: 64.474 Interval 651 (325000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0874 3 episodes - episode_reward: -14.028 [-153.583, 255.165] - loss: 6.551 - mae: 49.079 - mean_q: 64.204 Interval 652 (325500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1782 2 episodes - episode_reward: 39.296 [-125.013, 203.605] - loss: 8.058 - mae: 49.265 - mean_q: 64.542 Interval 653 (326000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8044 2 episodes - episode_reward: 208.277 [173.034, 243.519] - loss: 7.860 - mae: 49.160 - mean_q: 64.239 Interval 654 (326500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6602 1 episodes - episode_reward: 242.784 [242.784, 242.784] - loss: 8.955 - mae: 48.576 - mean_q: 63.396 Interval 655 (327000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7926 2 episodes - episode_reward: 233.693 [223.742, 243.643] - loss: 9.633 - mae: 48.494 - mean_q: 63.209 Interval 656 (327500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2338 Interval 657 (328000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3930 1 episodes - episode_reward: 284.849 [284.849, 284.849] - loss: 7.719 - mae: 48.343 - mean_q: 62.968 Interval 658 (328500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7566 2 episodes - episode_reward: 213.110 [201.097, 225.123] - loss: 5.683 - mae: 48.067 - mean_q: 62.812 Interval 659 (329000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7248 3 episodes - episode_reward: 147.959 [-107.741, 298.992] - loss: 8.233 - mae: 47.854 - mean_q: 62.425 Interval 660 (329500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5081 2 episodes - episode_reward: 64.663 [-110.169, 239.496] - loss: 6.158 - mae: 48.150 - mean_q: 62.839 Interval 661 (330000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5985 1 episodes - episode_reward: 295.682 [295.682, 295.682] - loss: 7.463 - mae: 48.248 - mean_q: 62.978 Interval 662 (330500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1124 3 episodes - episode_reward: 3.403 [-118.344, 228.542] - loss: 6.775 - mae: 48.076 - mean_q: 62.688 Interval 663 (331000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7108 1 episodes - episode_reward: 208.068 [208.068, 208.068] - loss: 6.722 - mae: 47.959 - mean_q: 62.250 Interval 664 (331500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2264 3 episodes - episode_reward: 107.715 [-101.891, 293.091] - loss: 6.901 - mae: 47.886 - mean_q: 62.170 Interval 665 (332000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6819 1 episodes - episode_reward: 294.522 [294.522, 294.522] - loss: 8.951 - mae: 47.785 - mean_q: 61.951 Interval 666 (332500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5703 1 episodes - episode_reward: 194.259 [194.259, 194.259] - loss: 6.721 - mae: 47.723 - mean_q: 61.911 Interval 667 (333000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4877 1 episodes - episode_reward: 251.372 [251.372, 251.372] - loss: 9.040 - mae: 47.820 - mean_q: 62.507 Interval 668 (333500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6391 2 episodes - episode_reward: 202.202 [189.407, 214.998] - loss: 6.121 - mae: 47.475 - mean_q: 61.860 Interval 669 (334000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6087 1 episodes - episode_reward: 233.690 [233.690, 233.690] - loss: 7.339 - mae: 47.858 - mean_q: 62.188 Interval 670 (334500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3772 1 episodes - episode_reward: 232.933 [232.933, 232.933] - loss: 9.639 - mae: 47.682 - mean_q: 61.977 Interval 671 (335000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6555 1 episodes - episode_reward: 213.673 [213.673, 213.673] - loss: 6.088 - mae: 47.857 - mean_q: 62.100 Interval 672 (335500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2395 1 episodes - episode_reward: 308.060 [308.060, 308.060] - loss: 8.272 - mae: 47.875 - mean_q: 62.380 Interval 673 (336000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1765 2 episodes - episode_reward: -142.366 [-188.084, -96.648] - loss: 10.187 - mae: 47.762 - mean_q: 61.883 Interval 674 (336500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6076 2 episodes - episode_reward: 239.911 [204.986, 274.836] - loss: 7.225 - mae: 47.963 - mean_q: 62.462 Interval 675 (337000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0701 2 episodes - episode_reward: -77.191 [-127.018, -27.364] - loss: 11.385 - mae: 48.090 - mean_q: 62.594 Interval 676 (337500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3254 1 episodes - episode_reward: 198.864 [198.864, 198.864] - loss: 7.834 - mae: 47.903 - mean_q: 62.372 Interval 677 (338000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6514 3 episodes - episode_reward: 133.943 [-119.197, 261.336] - loss: 8.256 - mae: 47.940 - mean_q: 62.661 Interval 678 (338500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7426 1 episodes - episode_reward: 244.385 [244.385, 244.385] - loss: 7.573 - mae: 48.162 - mean_q: 62.787 Interval 679 (339000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6784 3 episodes - episode_reward: 134.375 [-103.056, 282.843] - loss: 6.001 - mae: 47.975 - mean_q: 62.730 Interval 680 (339500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6333 1 episodes - episode_reward: 294.808 [294.808, 294.808] - loss: 8.584 - mae: 48.385 - mean_q: 63.318 Interval 681 (340000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0623 Interval 682 (340500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5258 1 episodes - episode_reward: 206.221 [206.221, 206.221] - loss: 7.294 - mae: 48.090 - mean_q: 63.045 Interval 683 (341000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8483 2 episodes - episode_reward: 274.753 [266.255, 283.252] - loss: 7.396 - mae: 48.008 - mean_q: 62.946 Interval 684 (341500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1749 Interval 685 (342000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4278 2 episodes - episode_reward: 69.925 [-99.985, 239.836] - loss: 5.648 - mae: 48.326 - mean_q: 63.282 Interval 686 (342500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7751 4 episodes - episode_reward: -47.102 [-304.516, 304.290] - loss: 7.974 - mae: 48.332 - mean_q: 63.210 Interval 687 (343000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7017 1 episodes - episode_reward: 216.175 [216.175, 216.175] - loss: 9.935 - mae: 48.192 - mean_q: 62.643 Interval 688 (343500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4395 3 episodes - episode_reward: 124.375 [-100.000, 238.211] - loss: 7.011 - mae: 48.035 - mean_q: 62.663 Interval 689 (344000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2299 3 episodes - episode_reward: 3.813 [-114.458, 236.915] - loss: 8.146 - mae: 48.056 - mean_q: 62.788 Interval 690 (344500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6154 2 episodes - episode_reward: 91.622 [-15.338, 198.581] - loss: 7.355 - mae: 48.087 - mean_q: 62.683 Interval 691 (345000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5756 2 episodes - episode_reward: 255.024 [195.596, 314.451] - loss: 6.095 - mae: 48.434 - mean_q: 62.911 Interval 692 (345500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1619 Interval 693 (346000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5147 1 episodes - episode_reward: 172.168 [172.168, 172.168] - loss: 9.286 - mae: 48.102 - mean_q: 62.295 Interval 694 (346500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4309 1 episodes - episode_reward: 257.409 [257.409, 257.409] - loss: 5.982 - mae: 48.167 - mean_q: 62.766 Interval 695 (347000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1594 2 episodes - episode_reward: 58.202 [-100.141, 216.544] - loss: 5.555 - mae: 48.170 - mean_q: 62.358 Interval 696 (347500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5917 1 episodes - episode_reward: 277.025 [277.025, 277.025] - loss: 6.495 - mae: 48.201 - mean_q: 62.632 Interval 697 (348000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8344 2 episodes - episode_reward: 213.579 [198.383, 228.776] - loss: 7.072 - mae: 48.440 - mean_q: 63.020 Interval 698 (348500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3978 2 episodes - episode_reward: 82.629 [-58.213, 223.471] - loss: 7.112 - mae: 48.644 - mean_q: 63.480 Interval 699 (349000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0802 3 episodes - episode_reward: 50.221 [-55.135, 223.163] - loss: 8.239 - mae: 48.775 - mean_q: 63.509 Interval 700 (349500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6939 1 episodes - episode_reward: 244.271 [244.271, 244.271] - loss: 7.752 - mae: 48.605 - mean_q: 62.937 Interval 701 (350000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5047 2 episodes - episode_reward: 130.125 [-21.984, 282.233] - loss: 6.112 - mae: 48.961 - mean_q: 63.773 Interval 702 (350500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3176 1 episodes - episode_reward: 208.438 [208.438, 208.438] - loss: 8.083 - mae: 48.887 - mean_q: 63.875 Interval 703 (351000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7760 1 episodes - episode_reward: 283.840 [283.840, 283.840] - loss: 9.070 - mae: 48.968 - mean_q: 64.094 Interval 704 (351500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4993 3 episodes - episode_reward: 138.127 [-98.658, 260.857] - loss: 7.626 - mae: 49.059 - mean_q: 63.961 Interval 705 (352000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6044 1 episodes - episode_reward: 266.033 [266.033, 266.033] - loss: 8.654 - mae: 48.949 - mean_q: 63.947 Interval 706 (352500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2195 1 episodes - episode_reward: -156.256 [-156.256, -156.256] - loss: 6.171 - mae: 49.156 - mean_q: 64.566 Interval 707 (353000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4997 1 episodes - episode_reward: 171.118 [171.118, 171.118] - loss: 5.453 - mae: 48.849 - mean_q: 63.666 Interval 708 (353500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4887 1 episodes - episode_reward: 260.833 [260.833, 260.833] - loss: 7.813 - mae: 49.237 - mean_q: 64.566 Interval 709 (354000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3180 2 episodes - episode_reward: 88.801 [-63.417, 241.019] - loss: 8.164 - mae: 49.167 - mean_q: 64.248 Interval 710 (354500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0272 Interval 711 (355000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0383 Interval 712 (355500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0751 2 episodes - episode_reward: 36.081 [-107.123, 179.284] - loss: 7.191 - mae: 48.794 - mean_q: 63.816 Interval 713 (356000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7374 1 episodes - episode_reward: 243.431 [243.431, 243.431] - loss: 9.690 - mae: 49.018 - mean_q: 64.032 Interval 714 (356500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4810 1 episodes - episode_reward: 237.037 [237.037, 237.037] - loss: 6.787 - mae: 49.157 - mean_q: 64.421 Interval 715 (357000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4694 2 episodes - episode_reward: 55.549 [-98.435, 209.534] - loss: 7.396 - mae: 49.630 - mean_q: 64.761 Interval 716 (357500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1651 2 episodes - episode_reward: 23.123 [-285.218, 331.464] - loss: 7.812 - mae: 49.727 - mean_q: 64.926 Interval 717 (358000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.9746 2 episodes - episode_reward: 263.396 [236.902, 289.891] - loss: 7.395 - mae: 49.925 - mean_q: 65.257 Interval 718 (358500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4021 2 episodes - episode_reward: 149.531 [5.767, 293.296] - loss: 7.872 - mae: 50.230 - mean_q: 65.841 Interval 719 (359000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2227 3 episodes - episode_reward: 34.831 [-100.000, 231.163] - loss: 7.603 - mae: 50.129 - mean_q: 65.017 Interval 720 (359500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7713 1 episodes - episode_reward: 243.404 [243.404, 243.404] - loss: 8.167 - mae: 50.473 - mean_q: 65.854 Interval 721 (360000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5183 1 episodes - episode_reward: 214.690 [214.690, 214.690] - loss: 9.767 - mae: 50.892 - mean_q: 66.572 Interval 722 (360500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0361 Interval 723 (361000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0392 Interval 724 (361500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6643 2 episodes - episode_reward: 223.428 [221.430, 225.426] - loss: 7.425 - mae: 50.578 - mean_q: 66.188 Interval 725 (362000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4207 2 episodes - episode_reward: 45.963 [-111.914, 203.839] - loss: 6.386 - mae: 50.630 - mean_q: 66.382 Interval 726 (362500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4314 1 episodes - episode_reward: 277.036 [277.036, 277.036] - loss: 5.548 - mae: 50.361 - mean_q: 66.193 Interval 727 (363000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1758 Interval 728 (363500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5436 1 episodes - episode_reward: 264.356 [264.356, 264.356] - loss: 8.389 - mae: 50.540 - mean_q: 66.325 Interval 729 (364000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7194 3 episodes - episode_reward: -59.570 [-609.227, 259.608] - loss: 9.680 - mae: 50.716 - mean_q: 66.463 Interval 730 (364500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4308 Interval 731 (365000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4967 1 episodes - episode_reward: 293.419 [293.419, 293.419] - loss: 9.045 - mae: 51.142 - mean_q: 66.757 Interval 732 (365500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2715 4 episodes - episode_reward: -11.673 [-225.406, 248.908] - loss: 7.424 - mae: 50.566 - mean_q: 66.179 Interval 733 (366000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4765 1 episodes - episode_reward: 297.889 [297.889, 297.889] - loss: 7.255 - mae: 50.561 - mean_q: 66.292 Interval 734 (366500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1601 Interval 735 (367000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4711 3 episodes - episode_reward: 115.964 [-133.724, 301.354] - loss: 7.752 - mae: 50.509 - mean_q: 66.223 Interval 736 (367500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0784 1 episodes - episode_reward: -48.765 [-48.765, -48.765] - loss: 9.603 - mae: 50.490 - mean_q: 66.533 Interval 737 (368000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0572 1 episodes - episode_reward: 158.809 [158.809, 158.809] - loss: 10.601 - mae: 50.007 - mean_q: 65.778 Interval 738 (368500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2193 3 episodes - episode_reward: -42.423 [-238.981, 211.712] - loss: 7.694 - mae: 49.730 - mean_q: 65.542 Interval 739 (369000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4856 1 episodes - episode_reward: 181.012 [181.012, 181.012] - loss: 10.289 - mae: 49.421 - mean_q: 65.143 Interval 740 (369500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7422 1 episodes - episode_reward: 226.208 [226.208, 226.208] - loss: 10.091 - mae: 49.497 - mean_q: 65.026 Interval 741 (370000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4097 1 episodes - episode_reward: 312.853 [312.853, 312.853] - loss: 9.012 - mae: 49.271 - mean_q: 64.798 Interval 742 (370500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1511 Interval 743 (371000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1815 1 episodes - episode_reward: 215.594 [215.594, 215.594] - loss: 9.630 - mae: 49.296 - mean_q: 64.761 Interval 744 (371500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7493 1 episodes - episode_reward: 229.046 [229.046, 229.046] - loss: 9.979 - mae: 49.281 - mean_q: 64.877 Interval 745 (372000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4089 2 episodes - episode_reward: 92.179 [-53.436, 237.794] - loss: 9.801 - mae: 49.349 - mean_q: 65.084 Interval 746 (372500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4823 1 episodes - episode_reward: 305.953 [305.953, 305.953] - loss: 10.237 - mae: 49.857 - mean_q: 65.644 Interval 747 (373000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0440 Interval 748 (373500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0368 Interval 749 (374000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0673 Interval 750 (374500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.0180 Interval 751 (375000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0587 Interval 752 (375500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.0116 Interval 753 (376000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1344 Interval 754 (376500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0318 Interval 755 (377000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.3301 1 episodes - episode_reward: 15.560 [15.560, 15.560] - loss: 7.887 - mae: 48.011 - mean_q: 63.344 Interval 756 (377500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0500 2 episodes - episode_reward: -46.471 [-47.908, -45.035] - loss: 7.513 - mae: 48.728 - mean_q: 64.361 Interval 757 (378000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2459 2 episodes - episode_reward: 74.554 [-134.316, 283.425] - loss: 8.376 - mae: 48.557 - mean_q: 64.134 Interval 758 (378500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0953 3 episodes - episode_reward: 33.529 [-72.305, 238.982] - loss: 7.636 - mae: 48.534 - mean_q: 63.736 Interval 759 (379000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3399 Interval 760 (379500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5041 1 episodes - episode_reward: 232.321 [232.321, 232.321] - loss: 9.252 - mae: 47.770 - mean_q: 62.889 Interval 761 (380000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4678 3 episodes - episode_reward: 144.579 [-100.585, 276.808] - loss: 8.110 - mae: 48.111 - mean_q: 63.414 Interval 762 (380500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5019 3 episodes - episode_reward: 69.629 [-37.815, 264.060] - loss: 10.152 - mae: 48.836 - mean_q: 64.371 Interval 763 (381000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1526 2 episodes - episode_reward: -39.395 [-68.322, -10.468] - loss: 14.127 - mae: 48.675 - mean_q: 64.406 Interval 764 (381500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.8282 1 episodes - episode_reward: 262.930 [262.930, 262.930] - loss: 9.086 - mae: 48.027 - mean_q: 63.361 Interval 765 (382000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4938 1 episodes - episode_reward: 253.933 [253.933, 253.933] - loss: 11.572 - mae: 47.749 - mean_q: 63.102 Interval 766 (382500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6380 2 episodes - episode_reward: 226.792 [209.852, 243.731] - loss: 11.667 - mae: 47.490 - mean_q: 62.837 Interval 767 (383000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4047 1 episodes - episode_reward: 229.347 [229.347, 229.347] - loss: 10.349 - mae: 47.416 - mean_q: 62.988 Interval 768 (383500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1105 3 episodes - episode_reward: -57.454 [-110.074, 32.603] - loss: 10.060 - mae: 47.434 - mean_q: 62.866 Interval 769 (384000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0384 3 episodes - episode_reward: 10.372 [-100.000, 207.839] - loss: 8.856 - mae: 47.769 - mean_q: 63.080 Interval 770 (384500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5026 1 episodes - episode_reward: 191.017 [191.017, 191.017] - loss: 9.954 - mae: 47.712 - mean_q: 63.283 Interval 771 (385000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.9075 2 episodes - episode_reward: 259.173 [256.241, 262.106] - loss: 12.003 - mae: 47.394 - mean_q: 62.798 Interval 772 (385500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4321 1 episodes - episode_reward: 324.088 [324.088, 324.088] - loss: 9.292 - mae: 47.156 - mean_q: 62.434 Interval 773 (386000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3055 3 episodes - episode_reward: 14.312 [-250.027, 248.638] - loss: 9.733 - mae: 47.483 - mean_q: 63.080 Interval 774 (386500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1063 2 episodes - episode_reward: 60.594 [-111.858, 233.046] - loss: 12.757 - mae: 47.237 - mean_q: 62.541 Interval 775 (387000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0672 3 episodes - episode_reward: -174.983 [-271.467, -31.950] - loss: 9.748 - mae: 47.123 - mean_q: 62.580 Interval 776 (387500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2586 Interval 777 (388000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0376 Interval 778 (388500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3620 1 episodes - episode_reward: 215.676 [215.676, 215.676] - loss: 10.746 - mae: 46.227 - mean_q: 61.180 Interval 779 (389000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2045 2 episodes - episode_reward: 79.814 [-45.715, 205.343] - loss: 13.713 - mae: 46.247 - mean_q: 61.191 Interval 780 (389500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1566 2 episodes - episode_reward: 53.135 [-160.755, 267.025] - loss: 11.050 - mae: 45.871 - mean_q: 60.401 Interval 781 (390000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5431 1 episodes - episode_reward: 217.313 [217.313, 217.313] - loss: 10.621 - mae: 45.900 - mean_q: 60.534 Interval 782 (390500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4411 1 episodes - episode_reward: 188.048 [188.048, 188.048] - loss: 10.568 - mae: 45.775 - mean_q: 60.291 Interval 783 (391000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0114 Interval 784 (391500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0430 2 episodes - episode_reward: 58.879 [-86.176, 203.934] - loss: 13.127 - mae: 44.782 - mean_q: 59.177 Interval 785 (392000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2109 2 episodes - episode_reward: 44.197 [-66.880, 155.274] - loss: 9.325 - mae: 45.403 - mean_q: 59.764 Interval 786 (392500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6212 2 episodes - episode_reward: 121.611 [-44.770, 287.991] - loss: 13.325 - mae: 44.755 - mean_q: 58.734 Interval 787 (393000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4487 2 episodes - episode_reward: 146.265 [-17.301, 309.830] - loss: 10.793 - mae: 44.000 - mean_q: 57.929 Interval 788 (393500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0803 2 episodes - episode_reward: -10.670 [-258.604, 237.263] - loss: 10.659 - mae: 43.790 - mean_q: 57.627 Interval 789 (394000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1639 1 episodes - episode_reward: -81.192 [-81.192, -81.192] - loss: 6.289 - mae: 43.551 - mean_q: 57.465 Interval 790 (394500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5859 2 episodes - episode_reward: 151.990 [51.746, 252.233] - loss: 11.547 - mae: 43.710 - mean_q: 57.347 Interval 791 (395000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3193 2 episodes - episode_reward: -7.911 [-264.975, 249.153] - loss: 9.711 - mae: 43.222 - mean_q: 56.576 Interval 792 (395500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6404 1 episodes - episode_reward: 209.544 [209.544, 209.544] - loss: 10.641 - mae: 43.751 - mean_q: 56.845 Interval 793 (396000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4658 2 episodes - episode_reward: 132.145 [-3.686, 267.976] - loss: 10.989 - mae: 43.384 - mean_q: 56.584 Interval 794 (396500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6110 2 episodes - episode_reward: 155.549 [-23.448, 334.547] - loss: 11.076 - mae: 43.531 - mean_q: 56.966 Interval 795 (397000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2947 1 episodes - episode_reward: 219.421 [219.421, 219.421] - loss: 10.788 - mae: 43.574 - mean_q: 56.580 Interval 796 (397500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2245 2 episodes - episode_reward: -98.094 [-113.884, -82.304] - loss: 10.183 - mae: 43.169 - mean_q: 56.165 Interval 797 (398000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0227 Interval 798 (398500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1510 2 episodes - episode_reward: 42.108 [-119.415, 203.630] - loss: 11.530 - mae: 42.365 - mean_q: 55.392 Interval 799 (399000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4773 1 episodes - episode_reward: 236.443 [236.443, 236.443] - loss: 13.782 - mae: 42.178 - mean_q: 55.173 Interval 800 (399500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3321 1 episodes - episode_reward: 158.950 [158.950, 158.950] - loss: 13.292 - mae: 42.314 - mean_q: 55.415 Interval 801 (400000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5485 1 episodes - episode_reward: 206.481 [206.481, 206.481] - loss: 12.389 - mae: 42.236 - mean_q: 55.294 Interval 802 (400500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1919 5 episodes - episode_reward: 12.684 [-177.045, 266.872] - loss: 11.700 - mae: 42.130 - mean_q: 55.117 Interval 803 (401000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3809 1 episodes - episode_reward: 246.068 [246.068, 246.068] - loss: 15.675 - mae: 42.140 - mean_q: 54.715 Interval 804 (401500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3282 2 episodes - episode_reward: 39.907 [-169.276, 249.089] - loss: 12.281 - mae: 42.126 - mean_q: 54.390 Interval 805 (402000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0148 1 episodes - episode_reward: -134.060 [-134.060, -134.060] - loss: 12.577 - mae: 41.703 - mean_q: 53.967 Interval 806 (402500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3927 1 episodes - episode_reward: 273.856 [273.856, 273.856] - loss: 9.818 - mae: 41.914 - mean_q: 54.377 Interval 807 (403000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7307 2 episodes - episode_reward: 237.490 [217.543, 257.438] - loss: 9.301 - mae: 42.125 - mean_q: 54.497 Interval 808 (403500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6906 1 episodes - episode_reward: 291.499 [291.499, 291.499] - loss: 10.873 - mae: 42.173 - mean_q: 54.627 Interval 809 (404000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4951 1 episodes - episode_reward: 215.061 [215.061, 215.061] - loss: 14.243 - mae: 42.385 - mean_q: 54.707 Interval 810 (404500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5372 1 episodes - episode_reward: 210.706 [210.706, 210.706] - loss: 10.547 - mae: 42.039 - mean_q: 54.340 Interval 811 (405000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3899 1 episodes - episode_reward: 230.506 [230.506, 230.506] - loss: 11.884 - mae: 43.061 - mean_q: 55.482 Interval 812 (405500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5920 2 episodes - episode_reward: 172.946 [164.456, 181.437] - loss: 8.763 - mae: 42.972 - mean_q: 55.662 Interval 813 (406000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0589 2 episodes - episode_reward: 6.636 [-217.010, 230.281] - loss: 8.478 - mae: 43.059 - mean_q: 56.029 Interval 814 (406500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0251 2 episodes - episode_reward: -2.802 [-36.468, 30.863] - loss: 8.641 - mae: 43.347 - mean_q: 56.182 Interval 815 (407000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7350 1 episodes - episode_reward: 207.324 [207.324, 207.324] - loss: 11.571 - mae: 43.532 - mean_q: 56.454 Interval 816 (407500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5106 1 episodes - episode_reward: 283.295 [283.295, 283.295] - loss: 10.323 - mae: 43.544 - mean_q: 56.582 Interval 817 (408000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6914 2 episodes - episode_reward: 246.526 [239.198, 253.855] - loss: 11.669 - mae: 43.461 - mean_q: 56.493 Interval 818 (408500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3986 Interval 819 (409000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4703 1 episodes - episode_reward: 308.857 [308.857, 308.857] - loss: 10.493 - mae: 43.471 - mean_q: 56.546 Interval 820 (409500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5053 1 episodes - episode_reward: 235.904 [235.904, 235.904] - loss: 8.649 - mae: 43.312 - mean_q: 56.523 Interval 821 (410000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3142 4 episodes - episode_reward: 70.061 [-138.084, 264.829] - loss: 11.529 - mae: 43.773 - mean_q: 56.698 Interval 822 (410500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1035 Interval 823 (411000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2993 1 episodes - episode_reward: 215.908 [215.908, 215.908] - loss: 12.173 - mae: 44.118 - mean_q: 56.988 Interval 824 (411500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5753 5 episodes - episode_reward: -54.994 [-150.655, 202.323] - loss: 11.579 - mae: 44.743 - mean_q: 57.680 Interval 825 (412000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2322 Interval 826 (412500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3999 1 episodes - episode_reward: 212.554 [212.554, 212.554] - loss: 9.027 - mae: 44.990 - mean_q: 58.405 Interval 827 (413000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0921 3 episodes - episode_reward: 19.757 [-121.622, 184.079] - loss: 8.997 - mae: 45.039 - mean_q: 58.389 Interval 828 (413500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0469 Interval 829 (414000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2414 1 episodes - episode_reward: 154.923 [154.923, 154.923] - loss: 11.691 - mae: 45.079 - mean_q: 58.613 Interval 830 (414500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0360 Interval 831 (415000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1891 1 episodes - episode_reward: -260.009 [-260.009, -260.009] - loss: 11.422 - mae: 44.903 - mean_q: 58.385 Interval 832 (415500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5627 1 episodes - episode_reward: 250.668 [250.668, 250.668] - loss: 10.048 - mae: 45.152 - mean_q: 58.596 Interval 833 (416000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4345 2 episodes - episode_reward: 111.256 [-69.674, 292.187] - loss: 12.354 - mae: 45.222 - mean_q: 58.778 Interval 834 (416500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0594 2 episodes - episode_reward: 16.732 [-244.093, 277.557] - loss: 10.278 - mae: 45.300 - mean_q: 59.121 Interval 835 (417000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.9950 2 episodes - episode_reward: 236.386 [197.163, 275.609] - loss: 14.494 - mae: 45.376 - mean_q: 58.838 Interval 836 (417500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0788 2 episodes - episode_reward: 103.967 [-20.334, 228.269] - loss: 10.663 - mae: 45.759 - mean_q: 59.400 Interval 837 (418000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0184 3 episodes - episode_reward: -6.701 [-133.890, 212.682] - loss: 9.135 - mae: 45.558 - mean_q: 58.561 Interval 838 (418500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.9574 3 episodes - episode_reward: 126.352 [-100.000, 256.218] - loss: 10.491 - mae: 46.048 - mean_q: 59.780 Interval 839 (419000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6028 1 episodes - episode_reward: 308.953 [308.953, 308.953] - loss: 10.126 - mae: 46.177 - mean_q: 60.081 Interval 840 (419500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0389 1 episodes - episode_reward: -34.574 [-34.574, -34.574] - loss: 11.148 - mae: 46.154 - mean_q: 59.544 Interval 841 (420000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1969 1 episodes - episode_reward: 210.345 [210.345, 210.345] - loss: 9.078 - mae: 46.147 - mean_q: 59.432 Interval 842 (420500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1618 Interval 843 (421000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3289 2 episodes - episode_reward: -137.694 [-292.046, 16.657] - loss: 13.274 - mae: 46.343 - mean_q: 59.789 Interval 844 (421500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5659 1 episodes - episode_reward: 250.703 [250.703, 250.703] - loss: 13.560 - mae: 46.514 - mean_q: 60.032 Interval 845 (422000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7017 4 episodes - episode_reward: -85.761 [-144.805, -26.984] - loss: 11.099 - mae: 46.076 - mean_q: 59.484 Interval 846 (422500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2691 2 episodes - episode_reward: -35.028 [-38.638, -31.419] - loss: 13.395 - mae: 45.745 - mean_q: 59.093 Interval 847 (423000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6396 1 episodes - episode_reward: 210.204 [210.204, 210.204] - loss: 12.347 - mae: 46.151 - mean_q: 59.681 Interval 848 (423500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5033 1 episodes - episode_reward: 198.023 [198.023, 198.023] - loss: 11.611 - mae: 46.517 - mean_q: 59.874 Interval 849 (424000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0336 3 episodes - episode_reward: 32.293 [-114.126, 250.316] - loss: 11.925 - mae: 46.279 - mean_q: 59.714 Interval 850 (424500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4518 2 episodes - episode_reward: 76.164 [-112.446, 264.775] - loss: 11.258 - mae: 46.594 - mean_q: 59.935 Interval 851 (425000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0315 Interval 852 (425500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0876 2 episodes - episode_reward: 88.583 [-72.606, 249.772] - loss: 10.663 - mae: 46.772 - mean_q: 60.427 Interval 853 (426000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6859 1 episodes - episode_reward: -299.312 [-299.312, -299.312] - loss: 11.094 - mae: 46.658 - mean_q: 59.942 Interval 854 (426500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0867 2 episodes - episode_reward: -79.434 [-109.301, -49.567] - loss: 12.513 - mae: 46.833 - mean_q: 60.062 Interval 855 (427000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2691 1 episodes - episode_reward: 216.302 [216.302, 216.302] - loss: 11.396 - mae: 47.205 - mean_q: 60.739 Interval 856 (427500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3905 1 episodes - episode_reward: 239.441 [239.441, 239.441] - loss: 11.632 - mae: 46.860 - mean_q: 59.765 Interval 857 (428000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2716 1 episodes - episode_reward: -72.860 [-72.860, -72.860] - loss: 12.698 - mae: 46.644 - mean_q: 59.653 Interval 858 (428500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4555 1 episodes - episode_reward: 282.155 [282.155, 282.155] - loss: 13.059 - mae: 46.710 - mean_q: 60.025 Interval 859 (429000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3502 1 episodes - episode_reward: 227.830 [227.830, 227.830] - loss: 11.222 - mae: 46.678 - mean_q: 59.807 Interval 860 (429500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0607 2 episodes - episode_reward: -73.454 [-327.618, 180.709] - loss: 10.244 - mae: 46.689 - mean_q: 59.588 Interval 861 (430000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2274 3 episodes - episode_reward: 61.780 [-126.358, 293.395] - loss: 11.852 - mae: 46.805 - mean_q: 59.976 Interval 862 (430500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8282 2 episodes - episode_reward: 264.541 [257.075, 272.008] - loss: 10.988 - mae: 46.612 - mean_q: 59.911 Interval 863 (431000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0771 Interval 864 (431500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5020 1 episodes - episode_reward: 153.195 [153.195, 153.195] - loss: 11.520 - mae: 46.486 - mean_q: 59.578 Interval 865 (432000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4150 3 episodes - episode_reward: 113.313 [-176.575, 259.495] - loss: 10.761 - mae: 46.429 - mean_q: 58.912 Interval 866 (432500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2333 Interval 867 (433000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4844 1 episodes - episode_reward: 225.494 [225.494, 225.494] - loss: 10.508 - mae: 46.329 - mean_q: 58.633 Interval 868 (433500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7173 2 episodes - episode_reward: 237.516 [232.723, 242.309] - loss: 12.901 - mae: 46.370 - mean_q: 58.937 Interval 869 (434000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4724 2 episodes - episode_reward: 131.083 [-4.767, 266.933] - loss: 14.130 - mae: 46.401 - mean_q: 59.333 Interval 870 (434500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6006 1 episodes - episode_reward: 221.216 [221.216, 221.216] - loss: 10.459 - mae: 46.640 - mean_q: 59.389 Interval 871 (435000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0090 Interval 872 (435500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4338 2 episodes - episode_reward: 41.733 [-100.000, 183.465] - loss: 11.613 - mae: 47.060 - mean_q: 59.923 Interval 873 (436000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1185 3 episodes - episode_reward: 22.683 [-201.386, 316.847] - loss: 9.682 - mae: 46.907 - mean_q: 59.108 Interval 874 (436500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4012 1 episodes - episode_reward: 284.859 [284.859, 284.859] - loss: 10.881 - mae: 47.046 - mean_q: 59.801 Interval 875 (437000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1665 1 episodes - episode_reward: -55.332 [-55.332, -55.332] - loss: 10.998 - mae: 47.004 - mean_q: 60.021 Interval 876 (437500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4587 1 episodes - episode_reward: 243.871 [243.871, 243.871] - loss: 10.944 - mae: 47.532 - mean_q: 60.456 Interval 877 (438000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4469 4 episodes - episode_reward: 63.594 [-109.815, 235.237] - loss: 9.159 - mae: 47.602 - mean_q: 61.297 Interval 878 (438500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8845 2 episodes - episode_reward: 251.451 [235.231, 267.670] - loss: 12.635 - mae: 47.541 - mean_q: 60.614 Interval 879 (439000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3053 3 episodes - episode_reward: 49.593 [-121.563, 298.320] - loss: 12.714 - mae: 47.646 - mean_q: 60.939 Interval 880 (439500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4636 2 episodes - episode_reward: 76.630 [-108.843, 262.104] - loss: 12.678 - mae: 48.018 - mean_q: 61.725 Interval 881 (440000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5155 1 episodes - episode_reward: 245.290 [245.290, 245.290] - loss: 11.568 - mae: 48.016 - mean_q: 61.151 Interval 882 (440500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3285 2 episodes - episode_reward: 54.853 [-122.929, 232.635] - loss: 11.529 - mae: 48.217 - mean_q: 61.790 Interval 883 (441000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2444 4 episodes - episode_reward: 80.663 [-108.633, 287.295] - loss: 11.995 - mae: 48.369 - mean_q: 61.739 Interval 884 (441500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0096 2 episodes - episode_reward: -6.173 [-246.011, 233.664] - loss: 12.835 - mae: 48.257 - mean_q: 61.932 Interval 885 (442000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2452 2 episodes - episode_reward: 85.665 [-88.492, 259.823] - loss: 11.001 - mae: 48.500 - mean_q: 61.937 Interval 886 (442500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1375 Interval 887 (443000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3124 Interval 888 (443500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1498 4 episodes - episode_reward: -54.192 [-232.304, 245.738] - loss: 11.335 - mae: 48.508 - mean_q: 61.919 Interval 889 (444000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2351 Interval 890 (444500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 1.0769 2 episodes - episode_reward: 245.850 [225.503, 266.197] - loss: 12.459 - mae: 48.416 - mean_q: 61.430 Interval 891 (445000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5987 1 episodes - episode_reward: 325.402 [325.402, 325.402] - loss: 7.841 - mae: 48.744 - mean_q: 61.742 Interval 892 (445500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4259 1 episodes - episode_reward: 246.822 [246.822, 246.822] - loss: 11.848 - mae: 48.635 - mean_q: 61.998 Interval 893 (446000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0299 2 episodes - episode_reward: 53.431 [-109.203, 216.065] - loss: 11.553 - mae: 48.692 - mean_q: 62.053 Interval 894 (446500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0595 3 episodes - episode_reward: 2.871 [-117.528, 226.142] - loss: 14.128 - mae: 48.629 - mean_q: 61.891 Interval 895 (447000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5415 1 episodes - episode_reward: 208.331 [208.331, 208.331] - loss: 12.044 - mae: 48.568 - mean_q: 61.455 Interval 896 (447500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2461 2 episodes - episode_reward: 85.477 [-31.459, 202.413] - loss: 10.909 - mae: 48.628 - mean_q: 61.581 Interval 897 (448000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3389 2 episodes - episode_reward: 82.021 [-98.080, 262.122] - loss: 10.379 - mae: 48.875 - mean_q: 61.833 Interval 898 (448500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2317 Interval 899 (449000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5293 2 episodes - episode_reward: 201.803 [184.558, 219.048] - loss: 8.585 - mae: 49.238 - mean_q: 62.519 Interval 900 (449500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4455 1 episodes - episode_reward: 208.908 [208.908, 208.908] - loss: 9.120 - mae: 49.043 - mean_q: 62.749 Interval 901 (450000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7599 1 episodes - episode_reward: 250.191 [250.191, 250.191] - loss: 10.065 - mae: 49.308 - mean_q: 62.770 Interval 902 (450500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0686 Interval 903 (451000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4979 1 episodes - episode_reward: 183.300 [183.300, 183.300] - loss: 10.618 - mae: 49.010 - mean_q: 62.576 Interval 904 (451500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7241 2 episodes - episode_reward: 256.149 [246.178, 266.120] - loss: 11.584 - mae: 48.614 - mean_q: 62.141 Interval 905 (452000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1579 Interval 906 (452500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1735 2 episodes - episode_reward: -16.550 [-318.835, 285.736] - loss: 9.302 - mae: 48.709 - mean_q: 62.146 Interval 907 (453000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6466 1 episodes - episode_reward: 272.885 [272.885, 272.885] - loss: 10.915 - mae: 48.942 - mean_q: 63.217 Interval 908 (453500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1431 2 episodes - episode_reward: 71.563 [-100.000, 243.125] - loss: 11.113 - mae: 48.891 - mean_q: 62.766 Interval 909 (454000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5644 3 episodes - episode_reward: -106.094 [-176.905, -4.762] - loss: 11.941 - mae: 48.432 - mean_q: 62.521 Interval 910 (454500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0394 2 episodes - episode_reward: 21.923 [-114.601, 158.446] - loss: 12.332 - mae: 48.480 - mean_q: 62.184 Interval 911 (455000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3682 1 episodes - episode_reward: -267.858 [-267.858, -267.858] - loss: 12.292 - mae: 48.713 - mean_q: 62.570 Interval 912 (455500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1468 Interval 913 (456000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1474 Interval 914 (456500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0522 Interval 915 (457000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2815 1 episodes - episode_reward: -16.179 [-16.179, -16.179] - loss: 9.604 - mae: 49.037 - mean_q: 63.364 Interval 916 (457500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1934 Interval 917 (458000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0968 2 episodes - episode_reward: -69.607 [-237.405, 98.191] - loss: 9.252 - mae: 48.759 - mean_q: 62.665 Interval 918 (458500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5019 1 episodes - episode_reward: 274.948 [274.948, 274.948] - loss: 11.397 - mae: 49.098 - mean_q: 62.476 Interval 919 (459000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4898 1 episodes - episode_reward: 187.082 [187.082, 187.082] - loss: 9.913 - mae: 48.503 - mean_q: 62.361 Interval 920 (459500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5687 2 episodes - episode_reward: 193.418 [182.177, 204.658] - loss: 10.556 - mae: 48.674 - mean_q: 62.760 Interval 921 (460000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1051 3 episodes - episode_reward: -26.978 [-172.602, 188.815] - loss: 10.590 - mae: 49.008 - mean_q: 62.802 Interval 922 (460500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5630 1 episodes - episode_reward: 246.004 [246.004, 246.004] - loss: 11.487 - mae: 49.046 - mean_q: 62.569 Interval 923 (461000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2860 2 episodes - episode_reward: 30.884 [-139.800, 201.569] - loss: 10.191 - mae: 49.004 - mean_q: 62.946 Interval 924 (461500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6543 2 episodes - episode_reward: 268.766 [236.034, 301.497] - loss: 12.361 - mae: 50.153 - mean_q: 64.059 Interval 925 (462000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0865 3 episodes - episode_reward: -38.631 [-164.525, 206.500] - loss: 16.341 - mae: 50.034 - mean_q: 63.688 Interval 926 (462500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4012 1 episodes - episode_reward: 277.427 [277.427, 277.427] - loss: 9.252 - mae: 49.548 - mean_q: 63.476 Interval 927 (463000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0515 Interval 928 (463500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3383 2 episodes - episode_reward: 61.177 [-13.002, 135.356] - loss: 13.662 - mae: 49.767 - mean_q: 63.774 Interval 929 (464000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0078 2 episodes - episode_reward: 34.619 [-134.290, 203.528] - loss: 9.741 - mae: 49.986 - mean_q: 63.889 Interval 930 (464500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2758 2 episodes - episode_reward: 59.509 [-100.000, 219.018] - loss: 10.195 - mae: 49.225 - mean_q: 62.817 Interval 931 (465000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1289 3 episodes - episode_reward: -167.244 [-279.215, -110.771] - loss: 11.390 - mae: 49.692 - mean_q: 63.612 Interval 932 (465500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4544 1 episodes - episode_reward: 122.838 [122.838, 122.838] - loss: 11.805 - mae: 49.862 - mean_q: 63.860 Interval 933 (466000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6613 2 episodes - episode_reward: 227.589 [214.447, 240.731] - loss: 10.292 - mae: 49.238 - mean_q: 62.817 Interval 934 (466500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.9886 1 episodes - episode_reward: 266.442 [266.442, 266.442] - loss: 10.802 - mae: 49.517 - mean_q: 63.321 Interval 935 (467000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1800 Interval 936 (467500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5332 1 episodes - episode_reward: -242.030 [-242.030, -242.030] - loss: 11.239 - mae: 49.080 - mean_q: 62.877 Interval 937 (468000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4933 1 episodes - episode_reward: 272.947 [272.947, 272.947] - loss: 11.781 - mae: 49.586 - mean_q: 63.234 Interval 938 (468500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1051 2 episodes - episode_reward: -9.234 [-225.720, 207.252] - loss: 12.883 - mae: 49.465 - mean_q: 63.034 Interval 939 (469000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1798 1 episodes - episode_reward: -44.968 [-44.968, -44.968] - loss: 12.688 - mae: 49.157 - mean_q: 63.013 Interval 940 (469500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0507 3 episodes - episode_reward: -6.038 [-137.397, 230.831] - loss: 9.623 - mae: 49.308 - mean_q: 63.110 Interval 941 (470000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2473 1 episodes - episode_reward: 215.169 [215.169, 215.169] - loss: 12.793 - mae: 49.029 - mean_q: 62.200 Interval 942 (470500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.6438 1 episodes - episode_reward: 279.032 [279.032, 279.032] - loss: 11.502 - mae: 49.326 - mean_q: 62.958 Interval 943 (471000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5280 1 episodes - episode_reward: 207.956 [207.956, 207.956] - loss: 10.364 - mae: 49.345 - mean_q: 63.142 Interval 944 (471500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1781 1 episodes - episode_reward: 204.770 [204.770, 204.770] - loss: 10.313 - mae: 49.477 - mean_q: 63.558 Interval 945 (472000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4618 2 episodes - episode_reward: 138.746 [30.807, 246.685] - loss: 11.769 - mae: 49.476 - mean_q: 63.622 Interval 946 (472500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7495 1 episodes - episode_reward: 240.015 [240.015, 240.015] - loss: 9.287 - mae: 49.441 - mean_q: 63.799 Interval 947 (473000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 1.0071 2 episodes - episode_reward: 251.239 [251.146, 251.333] - loss: 14.560 - mae: 49.313 - mean_q: 63.053 Interval 948 (473500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1246 1 episodes - episode_reward: 227.530 [227.530, 227.530] - loss: 10.954 - mae: 48.938 - mean_q: 63.103 Interval 949 (474000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4014 2 episodes - episode_reward: 58.078 [-156.230, 272.386] - loss: 10.665 - mae: 49.665 - mean_q: 63.718 Interval 950 (474500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6448 1 episodes - episode_reward: 198.554 [198.554, 198.554] - loss: 13.165 - mae: 49.659 - mean_q: 63.388 Interval 951 (475000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7516 2 episodes - episode_reward: 241.182 [225.702, 256.662] - loss: 8.248 - mae: 49.843 - mean_q: 63.648 Interval 952 (475500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0859 Interval 953 (476000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3774 2 episodes - episode_reward: 115.232 [46.860, 183.605] - loss: 10.250 - mae: 50.458 - mean_q: 64.917 Interval 954 (476500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1962 1 episodes - episode_reward: 185.352 [185.352, 185.352] - loss: 8.949 - mae: 50.656 - mean_q: 65.444 Interval 955 (477000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5330 2 episodes - episode_reward: 96.921 [-12.525, 206.366] - loss: 8.205 - mae: 50.619 - mean_q: 65.420 Interval 956 (477500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4196 1 episodes - episode_reward: 255.692 [255.692, 255.692] - loss: 11.791 - mae: 51.021 - mean_q: 66.207 Interval 957 (478000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1615 2 episodes - episode_reward: -9.957 [-22.393, 2.479] - loss: 9.414 - mae: 51.343 - mean_q: 66.436 Interval 958 (478500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2446 2 episodes - episode_reward: -31.358 [-74.938, 12.221] - loss: 14.120 - mae: 51.886 - mean_q: 67.037 Interval 959 (479000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1374 1 episodes - episode_reward: 204.534 [204.534, 204.534] - loss: 13.471 - mae: 51.868 - mean_q: 67.279 Interval 960 (479500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0855 Interval 961 (480000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2315 Interval 962 (480500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2908 Interval 963 (481000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0429 2 episodes - episode_reward: -148.371 [-184.516, -112.227] - loss: 8.440 - mae: 51.599 - mean_q: 66.928 Interval 964 (481500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0146 1 episodes - episode_reward: -222.940 [-222.940, -222.940] - loss: 13.072 - mae: 51.582 - mean_q: 66.512 Interval 965 (482000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2779 2 episodes - episode_reward: 88.817 [-123.200, 300.835] - loss: 10.726 - mae: 51.816 - mean_q: 66.693 Interval 966 (482500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3408 1 episodes - episode_reward: 257.039 [257.039, 257.039] - loss: 10.137 - mae: 51.729 - mean_q: 66.742 Interval 967 (483000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0286 2 episodes - episode_reward: 66.656 [-105.815, 239.127] - loss: 8.958 - mae: 51.769 - mean_q: 66.732 Interval 968 (483500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3538 1 episodes - episode_reward: -52.361 [-52.361, -52.361] - loss: 15.128 - mae: 51.939 - mean_q: 67.094 Interval 969 (484000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6788 2 episodes - episode_reward: 270.314 [265.028, 275.601] - loss: 11.979 - mae: 51.518 - mean_q: 66.470 Interval 970 (484500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3129 2 episodes - episode_reward: -346.694 [-555.186, -138.201] - loss: 9.638 - mae: 51.724 - mean_q: 66.648 Interval 971 (485000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7029 1 episodes - episode_reward: 200.088 [200.088, 200.088] - loss: 17.447 - mae: 51.337 - mean_q: 65.676 Interval 972 (485500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7225 3 episodes - episode_reward: -82.641 [-273.423, 264.944] - loss: 13.698 - mae: 51.577 - mean_q: 66.399 Interval 973 (486000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0295 Interval 974 (486500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1948 Interval 975 (487000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1496 Interval 976 (487500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1670 Interval 977 (488000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2440 Interval 978 (488500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0084 1 episodes - episode_reward: -282.527 [-282.527, -282.527] - loss: 11.452 - mae: 50.152 - mean_q: 64.323 Interval 979 (489000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2800 2 episodes - episode_reward: 22.724 [-150.415, 195.863] - loss: 10.465 - mae: 50.761 - mean_q: 65.159 Interval 980 (489500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3349 1 episodes - episode_reward: 163.373 [163.373, 163.373] - loss: 13.956 - mae: 50.969 - mean_q: 65.362 Interval 981 (490000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1264 Interval 982 (490500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2318 1 episodes - episode_reward: 174.694 [174.694, 174.694] - loss: 11.318 - mae: 51.108 - mean_q: 65.617 Interval 983 (491000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2550 1 episodes - episode_reward: 134.021 [134.021, 134.021] - loss: 13.879 - mae: 50.998 - mean_q: 65.663 Interval 984 (491500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5141 1 episodes - episode_reward: 196.674 [196.674, 196.674] - loss: 14.392 - mae: 51.112 - mean_q: 66.097 Interval 985 (492000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4095 2 episodes - episode_reward: 116.222 [-16.118, 248.562] - loss: 10.183 - mae: 51.742 - mean_q: 66.716 Interval 986 (492500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3306 1 episodes - episode_reward: 193.806 [193.806, 193.806] - loss: 11.256 - mae: 51.560 - mean_q: 66.582 Interval 987 (493000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4765 1 episodes - episode_reward: 257.360 [257.360, 257.360] - loss: 10.460 - mae: 51.655 - mean_q: 66.541 Interval 988 (493500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4792 1 episodes - episode_reward: 234.321 [234.321, 234.321] - loss: 9.497 - mae: 51.666 - mean_q: 67.052 Interval 989 (494000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0908 1 episodes - episode_reward: -173.582 [-173.582, -173.582] - loss: 13.118 - mae: 51.839 - mean_q: 66.904 Interval 990 (494500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3314 1 episodes - episode_reward: 260.039 [260.039, 260.039] - loss: 11.076 - mae: 51.867 - mean_q: 67.081 Interval 991 (495000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8545 2 episodes - episode_reward: 231.751 [213.894, 249.609] - loss: 10.865 - mae: 52.184 - mean_q: 67.672 Interval 992 (495500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.6176 4 episodes - episode_reward: -77.778 [-137.573, 16.717] - loss: 9.870 - mae: 52.011 - mean_q: 67.428 Interval 993 (496000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1526 1 episodes - episode_reward: -73.107 [-73.107, -73.107] - loss: 12.169 - mae: 52.717 - mean_q: 68.067 Interval 994 (496500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0012 2 episodes - episode_reward: 98.311 [-69.676, 266.297] - loss: 12.924 - mae: 52.749 - mean_q: 68.098 Interval 995 (497000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4206 2 episodes - episode_reward: 70.426 [-87.871, 228.724] - loss: 12.341 - mae: 52.970 - mean_q: 68.497 Interval 996 (497500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4796 5 episodes - episode_reward: -159.564 [-333.844, -8.927] - loss: 19.024 - mae: 52.934 - mean_q: 68.613 Interval 997 (498000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1676 Interval 998 (498500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6633 3 episodes - episode_reward: -66.161 [-499.195, 266.995] - loss: 13.830 - mae: 53.284 - mean_q: 69.584 Interval 999 (499000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3047 1 episodes - episode_reward: -158.251 [-158.251, -158.251] - loss: 15.311 - mae: 53.375 - mean_q: 69.315 Interval 1000 (499500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1445 1 episodes - episode_reward: -65.469 [-65.469, -65.469] - loss: 16.084 - mae: 53.076 - mean_q: 69.202 Interval 1001 (500000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1515 2 episodes - episode_reward: 54.147 [-169.394, 277.687] - loss: 16.811 - mae: 53.315 - mean_q: 69.489 Interval 1002 (500500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2800 4 episodes - episode_reward: 7.513 [-133.100, 250.994] - loss: 16.400 - mae: 53.030 - mean_q: 69.274 Interval 1003 (501000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1694 Interval 1004 (501500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1304 Interval 1005 (502000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6638 2 episodes - episode_reward: -286.507 [-461.068, -111.947] - loss: 10.890 - mae: 53.380 - mean_q: 69.577 Interval 1006 (502500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3177 2 episodes - episode_reward: 97.897 [-28.487, 224.281] - loss: 14.408 - mae: 53.440 - mean_q: 69.632 Interval 1007 (503000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3273 2 episodes - episode_reward: 117.026 [-35.499, 269.552] - loss: 17.243 - mae: 52.856 - mean_q: 68.714 Interval 1008 (503500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4922 2 episodes - episode_reward: 88.563 [-36.210, 213.336] - loss: 14.241 - mae: 53.272 - mean_q: 69.137 Interval 1009 (504000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5100 2 episodes - episode_reward: 135.942 [39.290, 232.595] - loss: 10.983 - mae: 53.008 - mean_q: 68.960 Interval 1010 (504500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 5.8351e-04 Interval 1011 (505000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2417 Interval 1012 (505500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1668 Interval 1013 (506000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2021 2 episodes - episode_reward: -183.242 [-285.875, -80.610] - loss: 13.476 - mae: 52.419 - mean_q: 67.909 Interval 1014 (506500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5770 5 episodes - episode_reward: -37.561 [-113.536, 200.035] - loss: 12.096 - mae: 52.234 - mean_q: 67.699 Interval 1015 (507000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3833 2 episodes - episode_reward: -1.436 [-17.614, 14.741] - loss: 17.158 - mae: 52.361 - mean_q: 68.026 Interval 1016 (507500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2088 2 episodes - episode_reward: 119.272 [-26.944, 265.488] - loss: 17.664 - mae: 51.998 - mean_q: 67.304 Interval 1017 (508000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2392 2 episodes - episode_reward: -1.943 [-140.803, 136.918] - loss: 16.667 - mae: 52.459 - mean_q: 67.802 Interval 1018 (508500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6502 1 episodes - episode_reward: 188.020 [188.020, 188.020] - loss: 17.868 - mae: 52.292 - mean_q: 67.753 Interval 1019 (509000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2283 Interval 1020 (509500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0202 2 episodes - episode_reward: -53.242 [-122.493, 16.010] - loss: 20.022 - mae: 51.897 - mean_q: 67.417 Interval 1021 (510000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0480 1 episodes - episode_reward: -5.451 [-5.451, -5.451] - loss: 14.141 - mae: 52.055 - mean_q: 67.455 Interval 1022 (510500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1223 1 episodes - episode_reward: 196.753 [196.753, 196.753] - loss: 12.132 - mae: 52.341 - mean_q: 68.142 Interval 1023 (511000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3379 1 episodes - episode_reward: -130.712 [-130.712, -130.712] - loss: 13.062 - mae: 52.178 - mean_q: 67.823 Interval 1024 (511500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0538 Interval 1025 (512000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2461 1 episodes - episode_reward: -328.220 [-328.220, -328.220] - loss: 15.618 - mae: 51.649 - mean_q: 67.272 Interval 1026 (512500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2914 2 episodes - episode_reward: -33.744 [-255.125, 187.638] - loss: 15.672 - mae: 51.493 - mean_q: 67.103 Interval 1027 (513000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6977 2 episodes - episode_reward: -168.915 [-170.276, -167.554] - loss: 13.638 - mae: 51.432 - mean_q: 67.020 Interval 1028 (513500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2180 2 episodes - episode_reward: -14.132 [-61.371, 33.106] - loss: 13.841 - mae: 51.430 - mean_q: 66.857 Interval 1029 (514000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6357 2 episodes - episode_reward: -102.876 [-115.642, -90.110] - loss: 18.804 - mae: 51.277 - mean_q: 66.680 Interval 1030 (514500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5066 1 episodes - episode_reward: -211.131 [-211.131, -211.131] - loss: 16.953 - mae: 50.888 - mean_q: 66.543 Interval 1031 (515000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1561 2 episodes - episode_reward: -1.120 [-166.412, 164.172] - loss: 12.678 - mae: 50.987 - mean_q: 66.131 Interval 1032 (515500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2315 Interval 1033 (516000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2158 1 episodes - episode_reward: 261.680 [261.680, 261.680] - loss: 15.900 - mae: 50.395 - mean_q: 65.460 Interval 1034 (516500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7562 1 episodes - episode_reward: -345.638 [-345.638, -345.638] - loss: 15.451 - mae: 50.134 - mean_q: 65.046 Interval 1035 (517000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3223 1 episodes - episode_reward: -210.965 [-210.965, -210.965] - loss: 14.147 - mae: 49.823 - mean_q: 65.061 Interval 1036 (517500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1449 1 episodes - episode_reward: -107.448 [-107.448, -107.448] - loss: 13.663 - mae: 49.329 - mean_q: 63.974 Interval 1037 (518000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4767 1 episodes - episode_reward: 149.534 [149.534, 149.534] - loss: 15.485 - mae: 49.178 - mean_q: 63.844 Interval 1038 (518500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3275 2 episodes - episode_reward: -42.095 [-328.179, 243.988] - loss: 15.148 - mae: 48.814 - mean_q: 63.006 Interval 1039 (519000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2830 3 episodes - episode_reward: -46.898 [-100.000, 7.665] - loss: 11.121 - mae: 49.113 - mean_q: 63.836 Interval 1040 (519500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0869 2 episodes - episode_reward: -14.769 [-182.524, 152.987] - loss: 12.669 - mae: 48.936 - mean_q: 63.387 Interval 1041 (520000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6904 2 episodes - episode_reward: -144.511 [-152.037, -136.986] - loss: 12.880 - mae: 48.890 - mean_q: 62.857 Interval 1042 (520500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0529 1 episodes - episode_reward: -118.337 [-118.337, -118.337] - loss: 17.704 - mae: 49.016 - mean_q: 62.843 Interval 1043 (521000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4322 1 episodes - episode_reward: 187.339 [187.339, 187.339] - loss: 13.357 - mae: 48.901 - mean_q: 62.934 Interval 1044 (521500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2304 1 episodes - episode_reward: 218.029 [218.029, 218.029] - loss: 16.635 - mae: 48.844 - mean_q: 62.562 Interval 1045 (522000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3177 1 episodes - episode_reward: 198.266 [198.266, 198.266] - loss: 13.875 - mae: 48.722 - mean_q: 62.419 Interval 1046 (522500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6646 3 episodes - episode_reward: -148.991 [-174.281, -100.000] - loss: 12.826 - mae: 49.194 - mean_q: 62.589 Interval 1047 (523000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5278 1 episodes - episode_reward: 223.711 [223.711, 223.711] - loss: 14.478 - mae: 49.436 - mean_q: 62.833 Interval 1048 (523500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0797 2 episodes - episode_reward: 85.487 [-57.642, 228.616] - loss: 16.303 - mae: 49.467 - mean_q: 62.536 Interval 1049 (524000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1282 1 episodes - episode_reward: -39.176 [-39.176, -39.176] - loss: 13.131 - mae: 49.549 - mean_q: 63.183 Interval 1050 (524500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0222 1 episodes - episode_reward: -67.006 [-67.006, -67.006] - loss: 15.363 - mae: 50.284 - mean_q: 63.986 Interval 1051 (525000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3534 1 episodes - episode_reward: 110.270 [110.270, 110.270] - loss: 10.131 - mae: 50.264 - mean_q: 64.516 Interval 1052 (525500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2125 1 episodes - episode_reward: 175.816 [175.816, 175.816] - loss: 12.326 - mae: 50.163 - mean_q: 64.152 Interval 1053 (526000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3345 1 episodes - episode_reward: 185.143 [185.143, 185.143] - loss: 15.278 - mae: 50.423 - mean_q: 64.453 Interval 1054 (526500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2024 Interval 1055 (527000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2110 Interval 1056 (527500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1417 Interval 1057 (528000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0418 Interval 1058 (528500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3363 2 episodes - episode_reward: -212.509 [-321.596, -103.421] - loss: 15.833 - mae: 49.817 - mean_q: 64.164 Interval 1059 (529000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1790 Interval 1060 (529500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1439 3 episodes - episode_reward: 45.731 [-47.175, 220.378] - loss: 17.084 - mae: 50.020 - mean_q: 64.423 Interval 1061 (530000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3062 1 episodes - episode_reward: 219.662 [219.662, 219.662] - loss: 15.124 - mae: 49.928 - mean_q: 64.486 Interval 1062 (530500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6645 2 episodes - episode_reward: -220.147 [-345.063, -95.231] - loss: 18.229 - mae: 50.118 - mean_q: 64.395 Interval 1063 (531000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3246 3 episodes - episode_reward: -35.734 [-230.159, 248.253] - loss: 11.705 - mae: 50.009 - mean_q: 64.117 Interval 1064 (531500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0760 1 episodes - episode_reward: -30.304 [-30.304, -30.304] - loss: 16.876 - mae: 49.598 - mean_q: 63.580 Interval 1065 (532000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1009 Interval 1066 (532500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0056 3 episodes - episode_reward: 26.973 [-58.876, 182.854] - loss: 14.090 - mae: 49.604 - mean_q: 63.647 Interval 1067 (533000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5719 1 episodes - episode_reward: -246.803 [-246.803, -246.803] - loss: 14.325 - mae: 49.890 - mean_q: 64.603 Interval 1068 (533500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0141 Interval 1069 (534000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7614 2 episodes - episode_reward: -234.214 [-368.428, -100.000] - loss: 12.058 - mae: 49.780 - mean_q: 64.162 Interval 1070 (534500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1629 Interval 1071 (535000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0663 Interval 1072 (535500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4727 1 episodes - episode_reward: 27.536 [27.536, 27.536] - loss: 11.296 - mae: 48.774 - mean_q: 63.245 Interval 1073 (536000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1046 Interval 1074 (536500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2177 2 episodes - episode_reward: -18.924 [-226.785, 188.938] - loss: 14.903 - mae: 49.035 - mean_q: 63.249 Interval 1075 (537000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0355 Interval 1076 (537500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0832 Interval 1077 (538000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5321 1 episodes - episode_reward: 101.729 [101.729, 101.729] - loss: 11.432 - mae: 49.823 - mean_q: 63.829 Interval 1078 (538500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6038 1 episodes - episode_reward: 218.228 [218.228, 218.228] - loss: 12.094 - mae: 49.738 - mean_q: 64.148 Interval 1079 (539000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7363 2 episodes - episode_reward: 273.733 [247.472, 299.995] - loss: 16.675 - mae: 49.407 - mean_q: 63.601 Interval 1080 (539500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1067 2 episodes - episode_reward: -22.923 [-72.805, 26.960] - loss: 12.570 - mae: 49.051 - mean_q: 63.199 Interval 1081 (540000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0468 2 episodes - episode_reward: 22.804 [-135.406, 181.015] - loss: 11.530 - mae: 49.060 - mean_q: 63.098 Interval 1082 (540500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2433 1 episodes - episode_reward: -199.533 [-199.533, -199.533] - loss: 13.986 - mae: 49.368 - mean_q: 63.551 Interval 1083 (541000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3671 3 episodes - episode_reward: 100.388 [-138.872, 270.267] - loss: 13.623 - mae: 49.367 - mean_q: 63.033 Interval 1084 (541500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2632 Interval 1085 (542000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1121 1 episodes - episode_reward: 187.019 [187.019, 187.019] - loss: 12.571 - mae: 48.687 - mean_q: 62.359 Interval 1086 (542500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5522 2 episodes - episode_reward: -145.312 [-176.268, -114.356] - loss: 12.479 - mae: 48.266 - mean_q: 61.656 Interval 1087 (543000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2714 1 episodes - episode_reward: 39.202 [39.202, 39.202] - loss: 11.741 - mae: 48.020 - mean_q: 61.386 Interval 1088 (543500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0961 1 episodes - episode_reward: 213.616 [213.616, 213.616] - loss: 14.402 - mae: 47.973 - mean_q: 60.899 Interval 1089 (544000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3108 1 episodes - episode_reward: 31.621 [31.621, 31.621] - loss: 9.596 - mae: 47.987 - mean_q: 61.275 Interval 1090 (544500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.7255 1 episodes - episode_reward: 342.321 [342.321, 342.321] - loss: 14.422 - mae: 47.825 - mean_q: 60.644 Interval 1091 (545000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0968 1 episodes - episode_reward: -11.152 [-11.152, -11.152] - loss: 12.514 - mae: 47.708 - mean_q: 60.806 Interval 1092 (545500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0072 Interval 1093 (546000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2326 1 episodes - episode_reward: 139.138 [139.138, 139.138] - loss: 13.285 - mae: 47.297 - mean_q: 60.442 Interval 1094 (546500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0743 1 episodes - episode_reward: -19.432 [-19.432, -19.432] - loss: 12.610 - mae: 47.712 - mean_q: 60.868 Interval 1095 (547000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0896 Interval 1096 (547500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3489 1 episodes - episode_reward: 111.775 [111.775, 111.775] - loss: 12.536 - mae: 47.296 - mean_q: 60.553 Interval 1097 (548000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1890 3 episodes - episode_reward: 22.554 [-95.124, 174.749] - loss: 14.768 - mae: 46.953 - mean_q: 60.382 Interval 1098 (548500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7139 2 episodes - episode_reward: -128.742 [-263.925, 6.442] - loss: 12.269 - mae: 46.650 - mean_q: 59.841 Interval 1099 (549000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1148 Interval 1100 (549500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2625 2 episodes - episode_reward: -104.515 [-127.935, -81.095] - loss: 13.610 - mae: 45.732 - mean_q: 58.328 Interval 1101 (550000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6701 2 episodes - episode_reward: -161.290 [-222.528, -100.052] - loss: 11.658 - mae: 45.397 - mean_q: 57.821 Interval 1102 (550500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2824 1 episodes - episode_reward: -135.925 [-135.925, -135.925] - loss: 16.320 - mae: 44.983 - mean_q: 57.662 Interval 1103 (551000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7122 1 episodes - episode_reward: 205.877 [205.877, 205.877] - loss: 12.565 - mae: 44.624 - mean_q: 56.878 Interval 1104 (551500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3221 3 episodes - episode_reward: 73.047 [-50.378, 241.302] - loss: 13.987 - mae: 44.386 - mean_q: 56.700 Interval 1105 (552000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0291 2 episodes - episode_reward: 53.241 [-116.319, 222.802] - loss: 13.256 - mae: 43.898 - mean_q: 56.121 Interval 1106 (552500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1131 1 episodes - episode_reward: -76.895 [-76.895, -76.895] - loss: 13.908 - mae: 43.659 - mean_q: 55.272 Interval 1107 (553000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2603 Interval 1108 (553500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3034 Interval 1109 (554000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3506 2 episodes - episode_reward: -65.194 [-418.829, 288.441] - loss: 16.773 - mae: 42.733 - mean_q: 54.098 Interval 1110 (554500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4065 1 episodes - episode_reward: 238.845 [238.845, 238.845] - loss: 12.498 - mae: 42.269 - mean_q: 54.111 Interval 1111 (555000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4038 1 episodes - episode_reward: 242.352 [242.352, 242.352] - loss: 15.279 - mae: 41.988 - mean_q: 53.838 Interval 1112 (555500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1332 Interval 1113 (556000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0394 1 episodes - episode_reward: -208.535 [-208.535, -208.535] - loss: 11.689 - mae: 41.607 - mean_q: 53.229 Interval 1114 (556500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3732 1 episodes - episode_reward: 224.911 [224.911, 224.911] - loss: 15.905 - mae: 41.317 - mean_q: 53.050 Interval 1115 (557000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0200 Interval 1116 (557500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0673 Interval 1117 (558000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2310 1 episodes - episode_reward: 186.970 [186.970, 186.970] - loss: 13.048 - mae: 40.940 - mean_q: 52.363 Interval 1118 (558500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3010 1 episodes - episode_reward: 168.596 [168.596, 168.596] - loss: 10.215 - mae: 40.634 - mean_q: 51.644 Interval 1119 (559000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2623 2 episodes - episode_reward: -61.095 [-99.155, -23.034] - loss: 12.485 - mae: 40.369 - mean_q: 51.898 Interval 1120 (559500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2238 Interval 1121 (560000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1810 1 episodes - episode_reward: -145.959 [-145.959, -145.959] - loss: 13.334 - mae: 39.271 - mean_q: 50.119 Interval 1122 (560500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4352 3 episodes - episode_reward: -100.201 [-343.576, 235.927] - loss: 11.913 - mae: 38.977 - mean_q: 49.735 Interval 1123 (561000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1001 Interval 1124 (561500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2206 1 episodes - episode_reward: -148.577 [-148.577, -148.577] - loss: 12.765 - mae: 38.811 - mean_q: 49.112 Interval 1125 (562000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3306 2 episodes - episode_reward: 12.481 [-138.531, 163.494] - loss: 11.555 - mae: 39.187 - mean_q: 49.409 Interval 1126 (562500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1581 1 episodes - episode_reward: 278.120 [278.120, 278.120] - loss: 11.185 - mae: 38.753 - mean_q: 48.668 Interval 1127 (563000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7144 3 episodes - episode_reward: -147.523 [-191.711, -101.402] - loss: 12.916 - mae: 38.641 - mean_q: 48.972 Interval 1128 (563500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0808 Interval 1129 (564000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0465 Interval 1130 (564500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0959 1 episodes - episode_reward: -125.554 [-125.554, -125.554] - loss: 14.885 - mae: 38.829 - mean_q: 49.055 Interval 1131 (565000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0290 Interval 1132 (565500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1423 2 episodes - episode_reward: 8.202 [-142.101, 158.504] - loss: 10.910 - mae: 39.286 - mean_q: 49.649 Interval 1133 (566000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0024 2 episodes - episode_reward: -14.369 [-150.343, 121.605] - loss: 11.538 - mae: 39.424 - mean_q: 49.640 Interval 1134 (566500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3564 2 episodes - episode_reward: -77.945 [-280.830, 124.941] - loss: 10.765 - mae: 39.239 - mean_q: 49.408 Interval 1135 (567000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3899 2 episodes - episode_reward: -182.307 [-241.252, -123.363] - loss: 14.662 - mae: 39.460 - mean_q: 50.206 Interval 1136 (567500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3019 3 episodes - episode_reward: 92.854 [-194.396, 247.564] - loss: 12.207 - mae: 39.625 - mean_q: 50.474 Interval 1137 (568000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1708 2 episodes - episode_reward: 52.265 [-141.903, 246.433] - loss: 11.435 - mae: 40.214 - mean_q: 50.985 Interval 1138 (568500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4629 2 episodes - episode_reward: 127.293 [-9.722, 264.309] - loss: 15.582 - mae: 40.197 - mean_q: 51.285 Interval 1139 (569000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2344 2 episodes - episode_reward: -58.207 [-195.711, 79.297] - loss: 11.320 - mae: 40.196 - mean_q: 50.895 Interval 1140 (569500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3508 1 episodes - episode_reward: 311.618 [311.618, 311.618] - loss: 14.974 - mae: 40.233 - mean_q: 51.031 Interval 1141 (570000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1141 Interval 1142 (570500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6013 3 episodes - episode_reward: -40.694 [-204.703, 242.966] - loss: 12.456 - mae: 41.168 - mean_q: 52.486 Interval 1143 (571000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9973 3 episodes - episode_reward: -183.572 [-251.379, -90.962] - loss: 12.445 - mae: 41.115 - mean_q: 52.305 Interval 1144 (571500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2076 1 episodes - episode_reward: -88.943 [-88.943, -88.943] - loss: 12.453 - mae: 41.036 - mean_q: 52.303 Interval 1145 (572000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0247 Interval 1146 (572500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0749 3 episodes - episode_reward: 2.144 [-334.141, 217.864] - loss: 14.850 - mae: 41.928 - mean_q: 53.461 Interval 1147 (573000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1000 Interval 1148 (573500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0203 Interval 1149 (574000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0986 1 episodes - episode_reward: 149.990 [149.990, 149.990] - loss: 13.915 - mae: 42.010 - mean_q: 53.306 Interval 1150 (574500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1586 Interval 1151 (575000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2265 Interval 1152 (575500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0254 4 episodes - episode_reward: -188.650 [-330.818, -79.908] - loss: 11.733 - mae: 41.993 - mean_q: 53.796 Interval 1153 (576000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0558 Interval 1154 (576500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0015 Interval 1155 (577000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0642 1 episodes - episode_reward: 75.320 [75.320, 75.320] - loss: 9.878 - mae: 42.346 - mean_q: 54.374 Interval 1156 (577500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3958 1 episodes - episode_reward: 183.920 [183.920, 183.920] - loss: 11.721 - mae: 42.782 - mean_q: 54.389 Interval 1157 (578000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5595 3 episodes - episode_reward: -132.067 [-253.167, -16.016] - loss: 12.866 - mae: 42.494 - mean_q: 53.790 Interval 1158 (578500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4922 1 episodes - episode_reward: -219.745 [-219.745, -219.745] - loss: 11.829 - mae: 42.714 - mean_q: 54.270 Interval 1159 (579000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1193 Interval 1160 (579500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1590 2 episodes - episode_reward: -88.260 [-257.335, 80.815] - loss: 13.317 - mae: 42.988 - mean_q: 54.162 Interval 1161 (580000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0483 Interval 1162 (580500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0291 Interval 1163 (581000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0428 Interval 1164 (581500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0860 Interval 1165 (582000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6527 1 episodes - episode_reward: -310.355 [-310.355, -310.355] - loss: 13.023 - mae: 42.129 - mean_q: 53.624 Interval 1166 (582500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2334 Interval 1167 (583000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3041 1 episodes - episode_reward: 161.825 [161.825, 161.825] - loss: 9.882 - mae: 42.140 - mean_q: 54.030 Interval 1168 (583500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1635 1 episodes - episode_reward: 125.284 [125.284, 125.284] - loss: 10.676 - mae: 42.644 - mean_q: 54.998 Interval 1169 (584000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6696 4 episodes - episode_reward: -107.969 [-141.666, -65.797] - loss: 12.330 - mae: 42.608 - mean_q: 54.809 Interval 1170 (584500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0394 Interval 1171 (585000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6716 2 episodes - episode_reward: 196.902 [196.458, 197.346] - loss: 9.221 - mae: 43.131 - mean_q: 55.231 Interval 1172 (585500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0349 Interval 1173 (586000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2480 1 episodes - episode_reward: 138.704 [138.704, 138.704] - loss: 10.998 - mae: 43.024 - mean_q: 55.177 Interval 1174 (586500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0280 Interval 1175 (587000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0036 1 episodes - episode_reward: -78.370 [-78.370, -78.370] - loss: 10.879 - mae: 42.821 - mean_q: 54.712 Interval 1176 (587500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2556 1 episodes - episode_reward: 191.504 [191.504, 191.504] - loss: 10.651 - mae: 43.062 - mean_q: 55.161 Interval 1177 (588000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0576 Interval 1178 (588500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3076 1 episodes - episode_reward: 130.641 [130.641, 130.641] - loss: 12.047 - mae: 42.808 - mean_q: 54.691 Interval 1179 (589000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1919 Interval 1180 (589500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1561 Interval 1181 (590000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2921 1 episodes - episode_reward: -48.235 [-48.235, -48.235] - loss: 11.251 - mae: 42.421 - mean_q: 54.130 Interval 1182 (590500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0481 Interval 1183 (591000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2337 1 episodes - episode_reward: 169.300 [169.300, 169.300] - loss: 10.187 - mae: 42.687 - mean_q: 54.274 Interval 1184 (591500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2203 Interval 1185 (592000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0995 2 episodes - episode_reward: -46.404 [-100.702, 7.895] - loss: 9.626 - mae: 42.457 - mean_q: 53.538 Interval 1186 (592500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0635 1 episodes - episode_reward: -18.927 [-18.927, -18.927] - loss: 9.622 - mae: 42.223 - mean_q: 53.637 Interval 1187 (593000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5952 3 episodes - episode_reward: -259.970 [-533.458, -93.381] - loss: 12.688 - mae: 41.800 - mean_q: 52.398 Interval 1188 (593500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5956 2 episodes - episode_reward: -92.106 [-143.202, -41.009] - loss: 11.139 - mae: 41.882 - mean_q: 51.850 Interval 1189 (594000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2945 Interval 1190 (594500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1700 Interval 1191 (595000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1304 2 episodes - episode_reward: -31.992 [-140.867, 76.883] - loss: 10.140 - mae: 40.730 - mean_q: 50.751 Interval 1192 (595500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0341 1 episodes - episode_reward: -113.748 [-113.748, -113.748] - loss: 12.481 - mae: 40.298 - mean_q: 50.023 Interval 1193 (596000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0111 Interval 1194 (596500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7481 2 episodes - episode_reward: 232.483 [163.438, 301.528] - loss: 8.299 - mae: 39.951 - mean_q: 49.879 Interval 1195 (597000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1806 1 episodes - episode_reward: -211.824 [-211.824, -211.824] - loss: 9.399 - mae: 39.858 - mean_q: 49.488 Interval 1196 (597500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0824 Interval 1197 (598000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3583 1 episodes - episode_reward: 136.003 [136.003, 136.003] - loss: 7.825 - mae: 40.022 - mean_q: 49.621 Interval 1198 (598500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0804 1 episodes - episode_reward: 25.031 [25.031, 25.031] - loss: 11.452 - mae: 40.271 - mean_q: 49.693 Interval 1199 (599000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0017 Interval 1200 (599500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2433 3 episodes - episode_reward: -22.568 [-173.662, 205.959] - loss: 9.152 - mae: 39.758 - mean_q: 49.641 Interval 1201 (600000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0162 2 episodes - episode_reward: 15.332 [-100.000, 130.664] - loss: 7.978 - mae: 39.667 - mean_q: 49.473 Interval 1202 (600500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4200 1 episodes - episode_reward: -189.904 [-189.904, -189.904] - loss: 8.707 - mae: 39.701 - mean_q: 49.810 Interval 1203 (601000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1668 1 episodes - episode_reward: 86.557 [86.557, 86.557] - loss: 10.035 - mae: 39.832 - mean_q: 49.839 Interval 1204 (601500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4151 1 episodes - episode_reward: 200.109 [200.109, 200.109] - loss: 10.051 - mae: 40.120 - mean_q: 50.482 Interval 1205 (602000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2054 1 episodes - episode_reward: 53.694 [53.694, 53.694] - loss: 10.148 - mae: 40.173 - mean_q: 50.412 Interval 1206 (602500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2849 1 episodes - episode_reward: 125.961 [125.961, 125.961] - loss: 10.630 - mae: 40.543 - mean_q: 50.849 Interval 1207 (603000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4645 1 episodes - episode_reward: -276.309 [-276.309, -276.309] - loss: 8.171 - mae: 40.290 - mean_q: 50.503 Interval 1208 (603500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2801 2 episodes - episode_reward: -14.150 [-248.573, 220.274] - loss: 10.687 - mae: 40.070 - mean_q: 50.157 Interval 1209 (604000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2814 2 episodes - episode_reward: -131.591 [-156.596, -106.585] - loss: 10.602 - mae: 40.384 - mean_q: 50.076 Interval 1210 (604500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7058 1 episodes - episode_reward: -264.457 [-264.457, -264.457] - loss: 11.044 - mae: 40.248 - mean_q: 49.955 Interval 1211 (605000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1470 3 episodes - episode_reward: -16.341 [-134.532, 189.603] - loss: 13.083 - mae: 40.518 - mean_q: 50.527 Interval 1212 (605500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0406 2 episodes - episode_reward: 71.972 [-101.726, 245.669] - loss: 9.851 - mae: 40.015 - mean_q: 50.244 Interval 1213 (606000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0228 Interval 1214 (606500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0019 Interval 1215 (607000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2024 1 episodes - episode_reward: 98.861 [98.861, 98.861] - loss: 9.577 - mae: 40.659 - mean_q: 50.812 Interval 1216 (607500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4349 1 episodes - episode_reward: 213.251 [213.251, 213.251] - loss: 10.582 - mae: 41.070 - mean_q: 51.613 Interval 1217 (608000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7119 1 episodes - episode_reward: -337.052 [-337.052, -337.052] - loss: 11.869 - mae: 41.238 - mean_q: 51.735 Interval 1218 (608500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3409 Interval 1219 (609000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4519 1 episodes - episode_reward: 234.081 [234.081, 234.081] - loss: 12.969 - mae: 41.370 - mean_q: 51.485 Interval 1220 (609500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1921 1 episodes - episode_reward: 195.246 [195.246, 195.246] - loss: 10.019 - mae: 41.435 - mean_q: 51.869 Interval 1221 (610000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -8.3313e-04 2 episodes - episode_reward: 38.563 [-92.061, 169.187] - loss: 8.581 - mae: 41.798 - mean_q: 52.183 Interval 1222 (610500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0312 1 episodes - episode_reward: -117.758 [-117.758, -117.758] - loss: 9.941 - mae: 41.634 - mean_q: 52.249 Interval 1223 (611000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0062 2 episodes - episode_reward: 6.454 [3.903, 9.006] - loss: 9.226 - mae: 41.577 - mean_q: 52.201 Interval 1224 (611500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2464 3 episodes - episode_reward: -3.573 [-207.230, 191.636] - loss: 11.810 - mae: 41.907 - mean_q: 52.808 Interval 1225 (612000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3860 1 episodes - episode_reward: 226.842 [226.842, 226.842] - loss: 11.475 - mae: 41.711 - mean_q: 52.630 Interval 1226 (612500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1873 3 episodes - episode_reward: -89.468 [-160.123, 3.354] - loss: 11.598 - mae: 41.657 - mean_q: 52.134 Interval 1227 (613000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4760 1 episodes - episode_reward: 215.454 [215.454, 215.454] - loss: 12.208 - mae: 41.387 - mean_q: 52.087 Interval 1228 (613500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1875 Interval 1229 (614000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2887 1 episodes - episode_reward: 133.623 [133.623, 133.623] - loss: 12.032 - mae: 41.560 - mean_q: 52.458 Interval 1230 (614500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2700 Interval 1231 (615000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2549 1 episodes - episode_reward: 284.207 [284.207, 284.207] - loss: 10.568 - mae: 41.700 - mean_q: 52.603 Interval 1232 (615500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8159 4 episodes - episode_reward: -98.877 [-129.514, -52.776] - loss: 10.031 - mae: 41.503 - mean_q: 52.280 Interval 1233 (616000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0297 1 episodes - episode_reward: -12.443 [-12.443, -12.443] - loss: 11.034 - mae: 41.432 - mean_q: 52.014 Interval 1234 (616500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3168 1 episodes - episode_reward: -36.300 [-36.300, -36.300] - loss: 8.821 - mae: 41.050 - mean_q: 51.451 Interval 1235 (617000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1702 4 episodes - episode_reward: -177.331 [-263.546, -110.245] - loss: 12.905 - mae: 40.881 - mean_q: 50.950 Interval 1236 (617500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3074 1 episodes - episode_reward: 187.934 [187.934, 187.934] - loss: 11.341 - mae: 40.798 - mean_q: 51.034 Interval 1237 (618000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.3892e-05 Interval 1238 (618500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2559 1 episodes - episode_reward: -162.473 [-162.473, -162.473] - loss: 11.095 - mae: 40.692 - mean_q: 50.823 Interval 1239 (619000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0759 Interval 1240 (619500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1107 2 episodes - episode_reward: -25.415 [-181.274, 130.444] - loss: 8.861 - mae: 40.906 - mean_q: 50.733 Interval 1241 (620000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0203 Interval 1242 (620500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3611 1 episodes - episode_reward: 125.369 [125.369, 125.369] - loss: 10.577 - mae: 41.002 - mean_q: 51.090 Interval 1243 (621000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0421 Interval 1244 (621500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2858 1 episodes - episode_reward: 158.071 [158.071, 158.071] - loss: 14.232 - mae: 40.843 - mean_q: 51.407 Interval 1245 (622000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3261 1 episodes - episode_reward: 193.722 [193.722, 193.722] - loss: 11.317 - mae: 40.740 - mean_q: 51.101 Interval 1246 (622500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1002 2 episodes - episode_reward: 4.794 [-5.737, 15.325] - loss: 13.648 - mae: 41.154 - mean_q: 51.660 Interval 1247 (623000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1413 Interval 1248 (623500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1471 1 episodes - episode_reward: 199.994 [199.994, 199.994] - loss: 11.336 - mae: 42.273 - mean_q: 53.284 Interval 1249 (624000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0779 Interval 1250 (624500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3325 1 episodes - episode_reward: 133.742 [133.742, 133.742] - loss: 10.977 - mae: 42.893 - mean_q: 53.407 Interval 1251 (625000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1186 2 episodes - episode_reward: 32.889 [-100.081, 165.860] - loss: 12.080 - mae: 42.920 - mean_q: 54.002 Interval 1252 (625500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8851 2 episodes - episode_reward: 256.889 [255.848, 257.930] - loss: 10.803 - mae: 42.780 - mean_q: 53.912 Interval 1253 (626000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2868 2 episodes - episode_reward: -71.620 [-149.239, 5.998] - loss: 14.606 - mae: 42.088 - mean_q: 53.085 Interval 1254 (626500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8870 2 episodes - episode_reward: -230.068 [-248.272, -211.865] - loss: 12.209 - mae: 41.862 - mean_q: 52.859 Interval 1255 (627000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3636 2 episodes - episode_reward: 73.211 [-104.506, 250.927] - loss: 11.181 - mae: 41.900 - mean_q: 52.716 Interval 1256 (627500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8248 2 episodes - episode_reward: 227.025 [161.856, 292.193] - loss: 9.367 - mae: 41.896 - mean_q: 52.605 Interval 1257 (628000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3172 Interval 1258 (628500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3832 1 episodes - episode_reward: 261.672 [261.672, 261.672] - loss: 9.669 - mae: 41.704 - mean_q: 52.488 Interval 1259 (629000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6090 4 episodes - episode_reward: -27.641 [-166.056, 193.257] - loss: 11.756 - mae: 41.601 - mean_q: 52.397 Interval 1260 (629500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4306 2 episodes - episode_reward: -148.090 [-259.608, -36.572] - loss: 11.463 - mae: 41.539 - mean_q: 52.709 Interval 1261 (630000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4668 2 episodes - episode_reward: -164.649 [-246.982, -82.315] - loss: 12.467 - mae: 42.063 - mean_q: 52.950 Interval 1262 (630500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1894 Interval 1263 (631000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2775 1 episodes - episode_reward: 263.378 [263.378, 263.378] - loss: 11.107 - mae: 42.081 - mean_q: 53.605 Interval 1264 (631500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3107 1 episodes - episode_reward: 188.067 [188.067, 188.067] - loss: 8.117 - mae: 41.890 - mean_q: 53.359 Interval 1265 (632000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3818 1 episodes - episode_reward: 166.870 [166.870, 166.870] - loss: 16.932 - mae: 41.825 - mean_q: 52.806 Interval 1266 (632500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6408 4 episodes - episode_reward: -193.291 [-214.805, -163.168] - loss: 11.236 - mae: 42.096 - mean_q: 53.187 Interval 1267 (633000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1954 2 episodes - episode_reward: 35.962 [-103.111, 175.034] - loss: 11.896 - mae: 42.398 - mean_q: 52.850 Interval 1268 (633500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3617 2 episodes - episode_reward: -183.783 [-200.515, -167.050] - loss: 12.889 - mae: 42.704 - mean_q: 53.314 Interval 1269 (634000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8091 2 episodes - episode_reward: 290.601 [284.523, 296.678] - loss: 11.331 - mae: 42.410 - mean_q: 52.921 Interval 1270 (634500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4218 2 episodes - episode_reward: 95.623 [-100.000, 291.246] - loss: 12.460 - mae: 42.553 - mean_q: 53.150 Interval 1271 (635000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3544 2 episodes - episode_reward: 43.302 [-139.343, 225.946] - loss: 12.198 - mae: 43.002 - mean_q: 53.878 Interval 1272 (635500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1620 3 episodes - episode_reward: -274.299 [-410.356, -85.067] - loss: 11.050 - mae: 43.164 - mean_q: 53.765 Interval 1273 (636000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3115 1 episodes - episode_reward: 213.728 [213.728, 213.728] - loss: 13.403 - mae: 43.293 - mean_q: 54.199 Interval 1274 (636500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3488 2 episodes - episode_reward: -83.479 [-347.664, 180.705] - loss: 10.794 - mae: 43.957 - mean_q: 54.682 Interval 1275 (637000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5775 5 episodes - episode_reward: -179.926 [-257.992, -116.900] - loss: 20.313 - mae: 43.798 - mean_q: 53.781 Interval 1276 (637500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3909 1 episodes - episode_reward: 216.921 [216.921, 216.921] - loss: 11.586 - mae: 43.541 - mean_q: 54.022 Interval 1277 (638000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3207 2 episodes - episode_reward: 81.484 [-16.674, 179.641] - loss: 14.804 - mae: 44.345 - mean_q: 55.095 Interval 1278 (638500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1334 2 episodes - episode_reward: 16.005 [-170.933, 202.943] - loss: 12.580 - mae: 44.461 - mean_q: 54.926 Interval 1279 (639000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0498 Interval 1280 (639500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2066 1 episodes - episode_reward: 142.714 [142.714, 142.714] - loss: 14.345 - mae: 44.825 - mean_q: 55.350 Interval 1281 (640000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4555 1 episodes - episode_reward: 218.922 [218.922, 218.922] - loss: 11.522 - mae: 45.090 - mean_q: 55.428 Interval 1282 (640500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4423 1 episodes - episode_reward: 183.314 [183.314, 183.314] - loss: 12.019 - mae: 45.237 - mean_q: 55.563 Interval 1283 (641000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0126 3 episodes - episode_reward: -2.521 [-125.158, 225.240] - loss: 12.741 - mae: 45.471 - mean_q: 55.823 Interval 1284 (641500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6540 1 episodes - episode_reward: 264.123 [264.123, 264.123] - loss: 9.026 - mae: 46.181 - mean_q: 56.465 Interval 1285 (642000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5950 5 episodes - episode_reward: -36.774 [-175.160, 249.271] - loss: 12.586 - mae: 46.480 - mean_q: 57.015 Interval 1286 (642500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6552 1 episodes - episode_reward: 221.753 [221.753, 221.753] - loss: 14.680 - mae: 46.223 - mean_q: 57.161 Interval 1287 (643000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1219 3 episodes - episode_reward: 5.420 [-124.523, 239.064] - loss: 13.240 - mae: 46.444 - mean_q: 57.148 Interval 1288 (643500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3960 1 episodes - episode_reward: 256.863 [256.863, 256.863] - loss: 12.712 - mae: 46.389 - mean_q: 57.182 Interval 1289 (644000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1190 4 episodes - episode_reward: -107.411 [-401.864, 258.597] - loss: 15.402 - mae: 46.678 - mean_q: 57.148 Interval 1290 (644500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3157 2 episodes - episode_reward: 68.471 [-117.739, 254.682] - loss: 12.757 - mae: 46.596 - mean_q: 56.734 Interval 1291 (645000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2641 3 episodes - episode_reward: -191.449 [-378.835, -92.784] - loss: 15.176 - mae: 46.871 - mean_q: 57.738 Interval 1292 (645500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4549 1 episodes - episode_reward: 148.498 [148.498, 148.498] - loss: 12.164 - mae: 46.854 - mean_q: 57.922 Interval 1293 (646000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7431 7 episodes - episode_reward: -54.845 [-118.932, 179.070] - loss: 14.549 - mae: 46.727 - mean_q: 57.060 Interval 1294 (646500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4395 1 episodes - episode_reward: 290.149 [290.149, 290.149] - loss: 15.529 - mae: 46.644 - mean_q: 57.011 Interval 1295 (647000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5073 1 episodes - episode_reward: 182.897 [182.897, 182.897] - loss: 13.355 - mae: 47.147 - mean_q: 57.577 Interval 1296 (647500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0731 1 episodes - episode_reward: -16.417 [-16.417, -16.417] - loss: 13.394 - mae: 47.691 - mean_q: 58.261 Interval 1297 (648000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1975 1 episodes - episode_reward: 233.233 [233.233, 233.233] - loss: 14.973 - mae: 47.699 - mean_q: 58.283 Interval 1298 (648500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2913 Interval 1299 (649000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1675 4 episodes - episode_reward: 2.320 [-122.960, 191.730] - loss: 14.253 - mae: 47.834 - mean_q: 59.235 Interval 1300 (649500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4919 2 episodes - episode_reward: -175.730 [-265.081, -86.378] - loss: 14.763 - mae: 48.744 - mean_q: 59.807 Interval 1301 (650000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3693 3 episodes - episode_reward: -18.175 [-248.470, 240.024] - loss: 13.328 - mae: 48.685 - mean_q: 60.083 Interval 1302 (650500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4478 1 episodes - episode_reward: 190.130 [190.130, 190.130] - loss: 14.871 - mae: 48.783 - mean_q: 60.082 Interval 1303 (651000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3205 1 episodes - episode_reward: 145.393 [145.393, 145.393] - loss: 13.953 - mae: 49.125 - mean_q: 60.712 Interval 1304 (651500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1550 1 episodes - episode_reward: -42.616 [-42.616, -42.616] - loss: 12.753 - mae: 49.000 - mean_q: 60.574 Interval 1305 (652000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3021 1 episodes - episode_reward: 231.905 [231.905, 231.905] - loss: 14.304 - mae: 49.096 - mean_q: 60.251 Interval 1306 (652500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5605 1 episodes - episode_reward: -239.179 [-239.179, -239.179] - loss: 15.463 - mae: 49.035 - mean_q: 60.845 Interval 1307 (653000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4537 1 episodes - episode_reward: 241.090 [241.090, 241.090] - loss: 14.657 - mae: 49.387 - mean_q: 60.872 Interval 1308 (653500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4556 1 episodes - episode_reward: 266.604 [266.604, 266.604] - loss: 11.851 - mae: 49.626 - mean_q: 61.618 Interval 1309 (654000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6153 1 episodes - episode_reward: -349.709 [-349.709, -349.709] - loss: 17.142 - mae: 49.071 - mean_q: 60.688 Interval 1310 (654500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0056 Interval 1311 (655000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3230 1 episodes - episode_reward: 129.953 [129.953, 129.953] - loss: 14.579 - mae: 48.812 - mean_q: 60.609 Interval 1312 (655500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5430 1 episodes - episode_reward: -192.639 [-192.639, -192.639] - loss: 13.627 - mae: 48.955 - mean_q: 60.582 Interval 1313 (656000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8656 2 episodes - episode_reward: -220.282 [-244.150, -196.414] - loss: 13.007 - mae: 48.492 - mean_q: 59.827 Interval 1314 (656500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3028 Interval 1315 (657000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0240 2 episodes - episode_reward: 75.928 [-72.700, 224.556] - loss: 18.100 - mae: 48.964 - mean_q: 61.084 Interval 1316 (657500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6450 3 episodes - episode_reward: -266.616 [-351.693, -222.914] - loss: 17.559 - mae: 49.615 - mean_q: 61.005 Interval 1317 (658000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0603 Interval 1318 (658500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3688 1 episodes - episode_reward: 186.028 [186.028, 186.028] - loss: 17.647 - mae: 49.512 - mean_q: 60.729 Interval 1319 (659000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1680 1 episodes - episode_reward: -130.000 [-130.000, -130.000] - loss: 16.030 - mae: 49.313 - mean_q: 60.242 Interval 1320 (659500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0189 Interval 1321 (660000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.6515 2 episodes - episode_reward: -128.376 [-153.804, -102.948] - loss: 15.175 - mae: 48.805 - mean_q: 59.568 Interval 1322 (660500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3553 1 episodes - episode_reward: 154.680 [154.680, 154.680] - loss: 16.482 - mae: 48.673 - mean_q: 59.195 Interval 1323 (661000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3985 Interval 1324 (661500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8593 6 episodes - episode_reward: -39.745 [-360.357, 299.202] - loss: 14.237 - mae: 48.032 - mean_q: 59.208 Interval 1325 (662000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3904 1 episodes - episode_reward: -77.701 [-77.701, -77.701] - loss: 12.311 - mae: 47.831 - mean_q: 58.588 Interval 1326 (662500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2459 2 episodes - episode_reward: 4.964 [-207.654, 217.583] - loss: 13.897 - mae: 47.689 - mean_q: 58.045 Interval 1327 (663000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1469 Interval 1328 (663500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0435 Interval 1329 (664000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3013 1 episodes - episode_reward: 43.069 [43.069, 43.069] - loss: 13.845 - mae: 47.935 - mean_q: 57.904 Interval 1330 (664500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2435 Interval 1331 (665000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7015 4 episodes - episode_reward: -26.926 [-210.241, 278.287] - loss: 14.635 - mae: 47.512 - mean_q: 57.319 Interval 1332 (665500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1962 1 episodes - episode_reward: -161.710 [-161.710, -161.710] - loss: 12.786 - mae: 47.562 - mean_q: 57.628 Interval 1333 (666000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0497 1 episodes - episode_reward: -147.819 [-147.819, -147.819] - loss: 15.272 - mae: 47.276 - mean_q: 57.074 Interval 1334 (666500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2813 1 episodes - episode_reward: 213.667 [213.667, 213.667] - loss: 16.576 - mae: 47.780 - mean_q: 57.265 Interval 1335 (667000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1685 1 episodes - episode_reward: 132.447 [132.447, 132.447] - loss: 12.755 - mae: 47.692 - mean_q: 57.342 Interval 1336 (667500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3830 1 episodes - episode_reward: 237.384 [237.384, 237.384] - loss: 11.380 - mae: 47.703 - mean_q: 57.128 Interval 1337 (668000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1879 1 episodes - episode_reward: -198.279 [-198.279, -198.279] - loss: 11.802 - mae: 48.235 - mean_q: 57.751 Interval 1338 (668500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0572 2 episodes - episode_reward: 48.471 [-100.000, 196.942] - loss: 14.190 - mae: 48.162 - mean_q: 57.201 Interval 1339 (669000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1474 1 episodes - episode_reward: -169.695 [-169.695, -169.695] - loss: 15.107 - mae: 48.676 - mean_q: 58.212 Interval 1340 (669500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2583 1 episodes - episode_reward: 271.366 [271.366, 271.366] - loss: 13.636 - mae: 47.952 - mean_q: 58.376 Interval 1341 (670000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4639 1 episodes - episode_reward: 204.690 [204.690, 204.690] - loss: 13.259 - mae: 48.400 - mean_q: 58.805 Interval 1342 (670500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3104 3 episodes - episode_reward: -50.630 [-212.137, 172.816] - loss: 13.716 - mae: 48.113 - mean_q: 58.112 Interval 1343 (671000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7867 3 episodes - episode_reward: -123.754 [-191.674, -54.353] - loss: 16.129 - mae: 48.374 - mean_q: 59.218 Interval 1344 (671500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4750 1 episodes - episode_reward: 230.674 [230.674, 230.674] - loss: 16.014 - mae: 48.444 - mean_q: 59.028 Interval 1345 (672000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2785 2 episodes - episode_reward: -40.313 [-313.027, 232.402] - loss: 13.445 - mae: 48.510 - mean_q: 59.423 Interval 1346 (672500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1637 Interval 1347 (673000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2489 1 episodes - episode_reward: 197.374 [197.374, 197.374] - loss: 13.293 - mae: 49.250 - mean_q: 60.190 Interval 1348 (673500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2511 Interval 1349 (674000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2749 4 episodes - episode_reward: -4.638 [-240.120, 228.242] - loss: 13.642 - mae: 49.353 - mean_q: 59.764 Interval 1350 (674500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0821 Interval 1351 (675000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0234 3 episodes - episode_reward: 8.567 [-122.526, 248.228] - loss: 15.872 - mae: 49.703 - mean_q: 59.487 Interval 1352 (675500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0158 3 episodes - episode_reward: -18.292 [-179.573, 267.259] - loss: 11.942 - mae: 49.636 - mean_q: 59.834 Interval 1353 (676000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2724 Interval 1354 (676500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1601 2 episodes - episode_reward: 60.632 [-110.969, 232.232] - loss: 14.295 - mae: 49.818 - mean_q: 59.824 Interval 1355 (677000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6630 Interval 1356 (677500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6300 2 episodes - episode_reward: -576.459 [-583.041, -569.876] - loss: 14.409 - mae: 49.920 - mean_q: 59.961 Interval 1357 (678000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9013 3 episodes - episode_reward: -184.617 [-254.367, -133.571] - loss: 15.313 - mae: 50.253 - mean_q: 60.125 Interval 1358 (678500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2504 1 episodes - episode_reward: 155.616 [155.616, 155.616] - loss: 14.701 - mae: 50.544 - mean_q: 60.786 Interval 1359 (679000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1943 Interval 1360 (679500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1553 2 episodes - episode_reward: 51.664 [-96.057, 199.385] - loss: 11.451 - mae: 50.807 - mean_q: 60.760 Interval 1361 (680000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1371 1 episodes - episode_reward: -16.997 [-16.997, -16.997] - loss: 15.256 - mae: 51.431 - mean_q: 61.549 Interval 1362 (680500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2795 2 episodes - episode_reward: 143.403 [22.605, 264.201] - loss: 16.791 - mae: 51.736 - mean_q: 61.009 Interval 1363 (681000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5265 1 episodes - episode_reward: 276.626 [276.626, 276.626] - loss: 11.157 - mae: 51.493 - mean_q: 61.433 Interval 1364 (681500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2946 3 episodes - episode_reward: -50.699 [-186.368, 176.753] - loss: 13.889 - mae: 51.821 - mean_q: 61.327 Interval 1365 (682000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2111 1 episodes - episode_reward: -77.917 [-77.917, -77.917] - loss: 12.519 - mae: 51.962 - mean_q: 61.619 Interval 1366 (682500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4015 1 episodes - episode_reward: -286.964 [-286.964, -286.964] - loss: 14.913 - mae: 51.821 - mean_q: 60.767 Interval 1367 (683000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4296 2 episodes - episode_reward: -102.707 [-138.390, -67.023] - loss: 14.906 - mae: 51.853 - mean_q: 60.710 Interval 1368 (683500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.3761 1 episodes - episode_reward: 251.585 [251.585, 251.585] - loss: 12.887 - mae: 52.192 - mean_q: 61.840 Interval 1369 (684000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3669 1 episodes - episode_reward: -16.793 [-16.793, -16.793] - loss: 16.347 - mae: 52.504 - mean_q: 60.745 Interval 1370 (684500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5368 3 episodes - episode_reward: 160.138 [-100.000, 304.760] - loss: 14.069 - mae: 52.229 - mean_q: 60.360 Interval 1371 (685000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0111 Interval 1372 (685500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.2689 1 episodes - episode_reward: 117.602 [117.602, 117.602] - loss: 18.995 - mae: 51.973 - mean_q: 60.487 Interval 1373 (686000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5251 1 episodes - episode_reward: 186.351 [186.351, 186.351] - loss: 15.148 - mae: 51.656 - mean_q: 59.947 Interval 1374 (686500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1837 1 episodes - episode_reward: 160.060 [160.060, 160.060] - loss: 11.068 - mae: 51.438 - mean_q: 60.549 Interval 1375 (687000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2180 1 episodes - episode_reward: -56.922 [-56.922, -56.922] - loss: 14.706 - mae: 51.567 - mean_q: 61.740 Interval 1376 (687500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4353 1 episodes - episode_reward: 206.789 [206.789, 206.789] - loss: 13.544 - mae: 51.544 - mean_q: 61.782 Interval 1377 (688000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1527 5 episodes - episode_reward: -122.201 [-199.929, -47.148] - loss: 13.905 - mae: 51.494 - mean_q: 61.963 Interval 1378 (688500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5725 1 episodes - episode_reward: 283.845 [283.845, 283.845] - loss: 15.530 - mae: 51.910 - mean_q: 62.744 Interval 1379 (689000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3112 Interval 1380 (689500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9461 4 episodes - episode_reward: -150.353 [-251.211, 14.870] - loss: 15.218 - mae: 52.107 - mean_q: 62.792 Interval 1381 (690000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6797 3 episodes - episode_reward: -141.642 [-146.278, -137.084] - loss: 21.375 - mae: 52.530 - mean_q: 62.900 Interval 1382 (690500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3116 2 episodes - episode_reward: -125.070 [-125.895, -124.245] - loss: 13.866 - mae: 52.331 - mean_q: 62.653 Interval 1383 (691000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0416 Interval 1384 (691500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4407 1 episodes - episode_reward: 241.968 [241.968, 241.968] - loss: 16.518 - mae: 52.117 - mean_q: 62.641 Interval 1385 (692000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2664 1 episodes - episode_reward: 212.253 [212.253, 212.253] - loss: 17.303 - mae: 52.148 - mean_q: 62.125 Interval 1386 (692500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3569 1 episodes - episode_reward: 189.461 [189.461, 189.461] - loss: 12.833 - mae: 52.121 - mean_q: 62.265 Interval 1387 (693000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1478 2 episodes - episode_reward: 25.885 [-146.192, 197.962] - loss: 12.430 - mae: 51.952 - mean_q: 62.434 Interval 1388 (693500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5133 3 episodes - episode_reward: -126.377 [-192.278, -42.876] - loss: 19.043 - mae: 51.871 - mean_q: 62.961 Interval 1389 (694000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0874 2 episodes - episode_reward: 25.027 [-132.092, 182.145] - loss: 13.627 - mae: 52.136 - mean_q: 63.079 Interval 1390 (694500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1367 Interval 1391 (695000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1428 3 episodes - episode_reward: 0.708 [-181.376, 190.917] - loss: 18.760 - mae: 52.202 - mean_q: 63.810 Interval 1392 (695500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4359 1 episodes - episode_reward: 215.991 [215.991, 215.991] - loss: 12.587 - mae: 51.643 - mean_q: 62.901 Interval 1393 (696000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5049 1 episodes - episode_reward: -204.719 [-204.719, -204.719] - loss: 17.396 - mae: 51.118 - mean_q: 63.078 Interval 1394 (696500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1216 1 episodes - episode_reward: -74.912 [-74.912, -74.912] - loss: 12.917 - mae: 51.023 - mean_q: 63.630 Interval 1395 (697000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2002 1 episodes - episode_reward: 182.863 [182.863, 182.863] - loss: 14.322 - mae: 51.064 - mean_q: 63.650 Interval 1396 (697500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2805 5 episodes - episode_reward: -123.631 [-268.636, -67.517] - loss: 15.175 - mae: 50.813 - mean_q: 62.986 Interval 1397 (698000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6404 3 episodes - episode_reward: -88.966 [-124.939, -45.211] - loss: 15.230 - mae: 51.213 - mean_q: 62.752 Interval 1398 (698500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3258 1 episodes - episode_reward: 113.611 [113.611, 113.611] - loss: 13.323 - mae: 50.626 - mean_q: 61.455 Interval 1399 (699000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5321 1 episodes - episode_reward: -289.538 [-289.538, -289.538] - loss: 13.098 - mae: 50.445 - mean_q: 62.062 Interval 1400 (699500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0597 1 episodes - episode_reward: -66.807 [-66.807, -66.807] - loss: 11.104 - mae: 50.097 - mean_q: 61.111 Interval 1401 (700000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0111 Interval 1402 (700500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3119 3 episodes - episode_reward: -6.611 [-118.313, 209.345] - loss: 13.586 - mae: 49.762 - mean_q: 60.200 Interval 1403 (701000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1070 1 episodes - episode_reward: -114.008 [-114.008, -114.008] - loss: 13.066 - mae: 50.236 - mean_q: 60.587 Interval 1404 (701500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6605 1 episodes - episode_reward: 145.156 [145.156, 145.156] - loss: 10.480 - mae: 49.552 - mean_q: 59.967 Interval 1405 (702000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3810 3 episodes - episode_reward: 126.008 [-17.627, 321.334] - loss: 12.393 - mae: 49.743 - mean_q: 59.967 Interval 1406 (702500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4273 1 episodes - episode_reward: 290.571 [290.571, 290.571] - loss: 15.598 - mae: 49.967 - mean_q: 60.693 Interval 1407 (703000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1303 Interval 1408 (703500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0909 Interval 1409 (704000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1325 Interval 1410 (704500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0452 Interval 1411 (705000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1248 1 episodes - episode_reward: -3.824 [-3.824, -3.824] - loss: 14.547 - mae: 48.743 - mean_q: 59.227 Interval 1412 (705500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1364 Interval 1413 (706000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1804 Interval 1414 (706500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1087 Interval 1415 (707000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0363 2 episodes - episode_reward: -130.360 [-139.738, -120.983] - loss: 12.387 - mae: 48.810 - mean_q: 60.813 Interval 1416 (707500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1146 1 episodes - episode_reward: -30.449 [-30.449, -30.449] - loss: 13.316 - mae: 48.373 - mean_q: 60.170 Interval 1417 (708000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2777 2 episodes - episode_reward: 78.366 [-62.156, 218.888] - loss: 12.119 - mae: 47.659 - mean_q: 59.719 Interval 1418 (708500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0944 Interval 1419 (709000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2722 Interval 1420 (709500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2926 1 episodes - episode_reward: 132.116 [132.116, 132.116] - loss: 12.903 - mae: 47.401 - mean_q: 59.290 Interval 1421 (710000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0328 Interval 1422 (710500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0272 Interval 1423 (711000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5604 3 episodes - episode_reward: -125.408 [-292.113, 117.346] - loss: 12.262 - mae: 46.863 - mean_q: 57.883 Interval 1424 (711500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1861 2 episodes - episode_reward: 77.311 [-122.333, 276.956] - loss: 13.107 - mae: 46.550 - mean_q: 57.881 Interval 1425 (712000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3469 2 episodes - episode_reward: 112.648 [20.973, 204.324] - loss: 12.415 - mae: 46.802 - mean_q: 58.368 Interval 1426 (712500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0457 1 episodes - episode_reward: -46.482 [-46.482, -46.482] - loss: 16.522 - mae: 46.925 - mean_q: 58.258 Interval 1427 (713000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5298 1 episodes - episode_reward: 182.730 [182.730, 182.730] - loss: 15.211 - mae: 46.561 - mean_q: 58.488 Interval 1428 (713500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2498 1 episodes - episode_reward: 233.902 [233.902, 233.902] - loss: 10.252 - mae: 45.869 - mean_q: 58.132 Interval 1429 (714000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0526 Interval 1430 (714500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0569 Interval 1431 (715000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3053 1 episodes - episode_reward: -205.021 [-205.021, -205.021] - loss: 10.068 - mae: 45.578 - mean_q: 57.322 Interval 1432 (715500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2431 2 episodes - episode_reward: 26.643 [-179.678, 232.965] - loss: 10.836 - mae: 45.215 - mean_q: 56.917 Interval 1433 (716000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8193 4 episodes - episode_reward: -126.983 [-266.553, 28.829] - loss: 16.208 - mae: 45.101 - mean_q: 57.124 Interval 1434 (716500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0685 2 episodes - episode_reward: -22.398 [-249.845, 205.049] - loss: 13.089 - mae: 44.830 - mean_q: 57.046 Interval 1435 (717000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1195 Interval 1436 (717500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0370 Interval 1437 (718000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0972 2 episodes - episode_reward: -59.021 [-206.282, 88.241] - loss: 10.368 - mae: 44.029 - mean_q: 55.429 Interval 1438 (718500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1542 2 episodes - episode_reward: -77.990 [-104.177, -51.803] - loss: 16.097 - mae: 43.873 - mean_q: 54.958 Interval 1439 (719000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3210 1 episodes - episode_reward: 239.457 [239.457, 239.457] - loss: 10.920 - mae: 43.548 - mean_q: 54.975 Interval 1440 (719500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2994 1 episodes - episode_reward: 159.864 [159.864, 159.864] - loss: 15.831 - mae: 43.557 - mean_q: 54.407 Interval 1441 (720000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0579 Interval 1442 (720500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5061 1 episodes - episode_reward: 171.726 [171.726, 171.726] - loss: 12.214 - mae: 42.924 - mean_q: 53.512 Interval 1443 (721000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3417 2 episodes - episode_reward: -14.618 [-280.280, 251.044] - loss: 10.483 - mae: 43.029 - mean_q: 53.620 Interval 1444 (721500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4397 4 episodes - episode_reward: -47.900 [-128.030, 19.714] - loss: 14.314 - mae: 42.614 - mean_q: 53.078 Interval 1445 (722000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1236 Interval 1446 (722500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0107 Interval 1447 (723000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0525 Interval 1448 (723500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0502 2 episodes - episode_reward: -14.755 [-148.035, 118.525] - loss: 11.693 - mae: 41.762 - mean_q: 51.510 Interval 1449 (724000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4378 2 episodes - episode_reward: 160.368 [23.319, 297.418] - loss: 17.971 - mae: 41.883 - mean_q: 51.326 Interval 1450 (724500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0871 Interval 1451 (725000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0642 Interval 1452 (725500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3306 2 episodes - episode_reward: 43.255 [-2.582, 89.093] - loss: 10.425 - mae: 41.061 - mean_q: 51.353 Interval 1453 (726000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1902 1 episodes - episode_reward: 144.672 [144.672, 144.672] - loss: 9.569 - mae: 40.731 - mean_q: 50.622 Interval 1454 (726500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0312 1 episodes - episode_reward: 23.426 [23.426, 23.426] - loss: 13.791 - mae: 40.363 - mean_q: 50.598 Interval 1455 (727000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5486 1 episodes - episode_reward: 170.012 [170.012, 170.012] - loss: 12.721 - mae: 40.762 - mean_q: 50.640 Interval 1456 (727500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0753 1 episodes - episode_reward: 210.901 [210.901, 210.901] - loss: 13.703 - mae: 40.836 - mean_q: 50.867 Interval 1457 (728000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3947 3 episodes - episode_reward: -241.387 [-444.507, -110.539] - loss: 10.824 - mae: 40.910 - mean_q: 51.076 Interval 1458 (728500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0350 1 episodes - episode_reward: -38.144 [-38.144, -38.144] - loss: 10.570 - mae: 41.066 - mean_q: 51.219 Interval 1459 (729000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6934 3 episodes - episode_reward: -118.904 [-157.131, -57.770] - loss: 10.972 - mae: 41.359 - mean_q: 51.439 Interval 1460 (729500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0319 1 episodes - episode_reward: -65.215 [-65.215, -65.215] - loss: 11.898 - mae: 41.499 - mean_q: 51.590 Interval 1461 (730000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0770 2 episodes - episode_reward: -4.388 [-246.545, 237.768] - loss: 12.874 - mae: 40.952 - mean_q: 51.200 Interval 1462 (730500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1177 3 episodes - episode_reward: -51.765 [-223.724, 205.518] - loss: 11.746 - mae: 41.174 - mean_q: 50.919 Interval 1463 (731000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0365 Interval 1464 (731500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0238 Interval 1465 (732000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2208 1 episodes - episode_reward: 213.654 [213.654, 213.654] - loss: 13.896 - mae: 40.923 - mean_q: 51.099 Interval 1466 (732500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5480 4 episodes - episode_reward: -69.034 [-159.609, -2.039] - loss: 12.534 - mae: 40.610 - mean_q: 50.818 Interval 1467 (733000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7518 5 episodes - episode_reward: -65.325 [-107.777, -9.377] - loss: 13.070 - mae: 40.286 - mean_q: 50.880 Interval 1468 (733500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5201 1 episodes - episode_reward: 231.715 [231.715, 231.715] - loss: 14.417 - mae: 40.237 - mean_q: 50.899 Interval 1469 (734000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2640 3 episodes - episode_reward: -48.771 [-112.920, -0.608] - loss: 11.182 - mae: 39.941 - mean_q: 50.623 Interval 1470 (734500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1924 2 episodes - episode_reward: -66.646 [-68.130, -65.162] - loss: 13.310 - mae: 39.641 - mean_q: 50.200 Interval 1471 (735000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7509 2 episodes - episode_reward: 215.833 [193.827, 237.839] - loss: 15.263 - mae: 39.038 - mean_q: 49.327 Interval 1472 (735500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4988 2 episodes - episode_reward: -193.589 [-269.659, -117.520] - loss: 12.508 - mae: 39.289 - mean_q: 49.633 Interval 1473 (736000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2660 1 episodes - episode_reward: 215.033 [215.033, 215.033] - loss: 12.173 - mae: 39.081 - mean_q: 49.566 Interval 1474 (736500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2878 2 episodes - episode_reward: -51.999 [-281.522, 177.523] - loss: 12.521 - mae: 39.089 - mean_q: 49.529 Interval 1475 (737000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0814 Interval 1476 (737500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3802 1 episodes - episode_reward: 203.301 [203.301, 203.301] - loss: 13.017 - mae: 38.667 - mean_q: 49.214 Interval 1477 (738000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9496 1 episodes - episode_reward: -406.720 [-406.720, -406.720] - loss: 9.602 - mae: 38.503 - mean_q: 48.975 Interval 1478 (738500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0524 3 episodes - episode_reward: -18.323 [-269.577, 220.764] - loss: 12.835 - mae: 38.454 - mean_q: 48.897 Interval 1479 (739000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0233 1 episodes - episode_reward: -104.585 [-104.585, -104.585] - loss: 11.615 - mae: 38.489 - mean_q: 48.987 Interval 1480 (739500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9176 3 episodes - episode_reward: -112.379 [-407.054, 189.453] - loss: 13.035 - mae: 38.568 - mean_q: 48.744 Interval 1481 (740000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6680 2 episodes - episode_reward: -151.475 [-251.164, -51.786] - loss: 12.516 - mae: 38.553 - mean_q: 48.402 Interval 1482 (740500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4878 1 episodes - episode_reward: 131.453 [131.453, 131.453] - loss: 14.996 - mae: 38.683 - mean_q: 48.425 Interval 1483 (741000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5204 2 episodes - episode_reward: 150.740 [17.447, 284.034] - loss: 17.929 - mae: 38.966 - mean_q: 47.842 Interval 1484 (741500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7216 1 episodes - episode_reward: 219.354 [219.354, 219.354] - loss: 14.838 - mae: 39.212 - mean_q: 48.251 Interval 1485 (742000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5662 2 episodes - episode_reward: 151.202 [40.886, 261.519] - loss: 15.602 - mae: 39.236 - mean_q: 48.597 Interval 1486 (742500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3670 2 episodes - episode_reward: 87.918 [-69.062, 244.899] - loss: 19.781 - mae: 39.638 - mean_q: 49.174 Interval 1487 (743000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0314 Interval 1488 (743500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0173 3 episodes - episode_reward: 21.596 [-140.369, 225.784] - loss: 19.416 - mae: 39.382 - mean_q: 48.834 Interval 1489 (744000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 1.0515 2 episodes - episode_reward: 278.794 [276.449, 281.140] - loss: 11.709 - mae: 39.737 - mean_q: 49.509 Interval 1490 (744500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5648 3 episodes - episode_reward: -109.090 [-139.420, -79.500] - loss: 14.988 - mae: 39.993 - mean_q: 49.492 Interval 1491 (745000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7827 5 episodes - episode_reward: -66.632 [-335.292, 330.190] - loss: 14.200 - mae: 39.948 - mean_q: 49.352 Interval 1492 (745500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1471 2 episodes - episode_reward: -43.026 [-146.667, 60.614] - loss: 18.649 - mae: 40.374 - mean_q: 49.587 Interval 1493 (746000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5426 1 episodes - episode_reward: 185.253 [185.253, 185.253] - loss: 13.548 - mae: 39.764 - mean_q: 49.557 Interval 1494 (746500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4365 3 episodes - episode_reward: -28.506 [-207.450, 264.631] - loss: 13.463 - mae: 40.231 - mean_q: 49.545 Interval 1495 (747000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3122 2 episodes - episode_reward: -149.185 [-155.066, -143.304] - loss: 12.305 - mae: 40.569 - mean_q: 49.381 Interval 1496 (747500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8989 2 episodes - episode_reward: 256.729 [234.137, 279.322] - loss: 19.790 - mae: 41.124 - mean_q: 50.485 Interval 1497 (748000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5384 1 episodes - episode_reward: 257.610 [257.610, 257.610] - loss: 11.553 - mae: 41.247 - mean_q: 51.264 Interval 1498 (748500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2601 1 episodes - episode_reward: 198.720 [198.720, 198.720] - loss: 19.150 - mae: 41.457 - mean_q: 51.967 Interval 1499 (749000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3716 2 episodes - episode_reward: 88.875 [-111.849, 289.598] - loss: 16.069 - mae: 42.060 - mean_q: 52.490 Interval 1500 (749500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3337 1 episodes - episode_reward: 181.463 [181.463, 181.463] - loss: 17.164 - mae: 42.560 - mean_q: 53.040 Interval 1501 (750000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4252 1 episodes - episode_reward: 185.063 [185.063, 185.063] - loss: 15.068 - mae: 42.776 - mean_q: 53.996 Interval 1502 (750500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0169 2 episodes - episode_reward: 15.546 [-132.053, 163.145] - loss: 12.934 - mae: 42.997 - mean_q: 54.902 Interval 1503 (751000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0538 2 episodes - episode_reward: 54.530 [-157.996, 267.056] - loss: 15.468 - mae: 43.423 - mean_q: 54.988 Interval 1504 (751500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0534 Interval 1505 (752000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4579 1 episodes - episode_reward: 87.183 [87.183, 87.183] - loss: 16.088 - mae: 44.334 - mean_q: 55.987 Interval 1506 (752500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1678 1 episodes - episode_reward: -75.676 [-75.676, -75.676] - loss: 25.285 - mae: 44.829 - mean_q: 56.954 Interval 1507 (753000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7636 2 episodes - episode_reward: -194.674 [-258.773, -130.576] - loss: 17.324 - mae: 44.352 - mean_q: 56.584 Interval 1508 (753500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4133 Interval 1509 (754000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8558 5 episodes - episode_reward: -47.690 [-258.497, 313.722] - loss: 15.235 - mae: 45.334 - mean_q: 57.461 Interval 1510 (754500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2321 Interval 1511 (755000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2078 Interval 1512 (755500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0275 Interval 1513 (756000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6089 2 episodes - episode_reward: 160.640 [95.975, 225.306] - loss: 13.230 - mae: 44.632 - mean_q: 57.213 Interval 1514 (756500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1572 2 episodes - episode_reward: 35.385 [-110.713, 181.484] - loss: 17.589 - mae: 44.641 - mean_q: 57.510 Interval 1515 (757000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3543 1 episodes - episode_reward: 173.984 [173.984, 173.984] - loss: 14.063 - mae: 44.702 - mean_q: 57.545 Interval 1516 (757500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2019 Interval 1517 (758000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2464 2 episodes - episode_reward: -2.053 [-197.539, 193.434] - loss: 15.263 - mae: 45.004 - mean_q: 57.913 Interval 1518 (758500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6419 1 episodes - episode_reward: 204.949 [204.949, 204.949] - loss: 13.720 - mae: 45.108 - mean_q: 57.970 Interval 1519 (759000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3282 1 episodes - episode_reward: -108.654 [-108.654, -108.654] - loss: 12.060 - mae: 44.937 - mean_q: 57.431 Interval 1520 (759500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0024 1 episodes - episode_reward: -470.893 [-470.893, -470.893] - loss: 18.242 - mae: 45.253 - mean_q: 57.646 Interval 1521 (760000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3969 1 episodes - episode_reward: 214.746 [214.746, 214.746] - loss: 16.551 - mae: 45.386 - mean_q: 57.971 Interval 1522 (760500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1710 Interval 1523 (761000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2055 1 episodes - episode_reward: 166.641 [166.641, 166.641] - loss: 14.854 - mae: 45.577 - mean_q: 58.096 Interval 1524 (761500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0143 Interval 1525 (762000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1760 1 episodes - episode_reward: 97.752 [97.752, 97.752] - loss: 13.034 - mae: 46.061 - mean_q: 57.972 Interval 1526 (762500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4904 1 episodes - episode_reward: -360.579 [-360.579, -360.579] - loss: 13.401 - mae: 45.735 - mean_q: 57.719 Interval 1527 (763000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5116 1 episodes - episode_reward: 254.389 [254.389, 254.389] - loss: 14.115 - mae: 45.818 - mean_q: 58.569 Interval 1528 (763500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3229 2 episodes - episode_reward: 74.307 [-94.014, 242.627] - loss: 15.517 - mae: 45.675 - mean_q: 58.144 Interval 1529 (764000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8058 3 episodes - episode_reward: -260.027 [-384.926, -100.000] - loss: 13.905 - mae: 46.063 - mean_q: 58.055 Interval 1530 (764500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6136 1 episodes - episode_reward: -346.447 [-346.447, -346.447] - loss: 12.321 - mae: 45.885 - mean_q: 57.751 Interval 1531 (765000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7844 2 episodes - episode_reward: -209.310 [-230.426, -188.194] - loss: 14.066 - mae: 46.016 - mean_q: 58.156 Interval 1532 (765500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0509 Interval 1533 (766000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3569 1 episodes - episode_reward: 183.488 [183.488, 183.488] - loss: 14.001 - mae: 45.461 - mean_q: 56.766 Interval 1534 (766500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3625 1 episodes - episode_reward: 122.226 [122.226, 122.226] - loss: 15.168 - mae: 45.094 - mean_q: 56.708 Interval 1535 (767000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2919 Interval 1536 (767500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0949 Interval 1537 (768000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1856 1 episodes - episode_reward: 88.306 [88.306, 88.306] - loss: 16.882 - mae: 45.174 - mean_q: 57.152 Interval 1538 (768500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1562 2 episodes - episode_reward: 31.750 [-109.969, 173.469] - loss: 15.663 - mae: 45.188 - mean_q: 57.224 Interval 1539 (769000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1073 1 episodes - episode_reward: -50.825 [-50.825, -50.825] - loss: 11.925 - mae: 44.742 - mean_q: 57.211 Interval 1540 (769500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1737 1 episodes - episode_reward: 10.789 [10.789, 10.789] - loss: 13.979 - mae: 45.151 - mean_q: 56.879 Interval 1541 (770000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1967 1 episodes - episode_reward: 170.082 [170.082, 170.082] - loss: 12.215 - mae: 45.226 - mean_q: 56.568 Interval 1542 (770500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1135 Interval 1543 (771000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1543 1 episodes - episode_reward: 189.316 [189.316, 189.316] - loss: 12.768 - mae: 45.204 - mean_q: 57.194 Interval 1544 (771500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0389 Interval 1545 (772000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3126 1 episodes - episode_reward: 83.858 [83.858, 83.858] - loss: 14.031 - mae: 45.300 - mean_q: 56.794 Interval 1546 (772500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3109 2 episodes - episode_reward: -69.079 [-283.574, 145.417] - loss: 13.199 - mae: 45.168 - mean_q: 56.477 Interval 1547 (773000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2598 Interval 1548 (773500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2828 1 episodes - episode_reward: 24.339 [24.339, 24.339] - loss: 15.427 - mae: 45.274 - mean_q: 56.603 Interval 1549 (774000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0499 Interval 1550 (774500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0179 2 episodes - episode_reward: 35.781 [-100.000, 171.562] - loss: 14.607 - mae: 45.461 - mean_q: 55.814 Interval 1551 (775000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1180 Interval 1552 (775500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5320 1 episodes - episode_reward: 143.147 [143.147, 143.147] - loss: 15.263 - mae: 45.778 - mean_q: 56.460 Interval 1553 (776000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3823 1 episodes - episode_reward: 281.494 [281.494, 281.494] - loss: 14.727 - mae: 45.454 - mean_q: 56.240 Interval 1554 (776500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1338 1 episodes - episode_reward: 199.285 [199.285, 199.285] - loss: 14.206 - mae: 45.680 - mean_q: 56.927 Interval 1555 (777000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1218 1 episodes - episode_reward: -172.138 [-172.138, -172.138] - loss: 13.537 - mae: 45.593 - mean_q: 56.351 Interval 1556 (777500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0264 Interval 1557 (778000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0301 Interval 1558 (778500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3421 1 episodes - episode_reward: 112.250 [112.250, 112.250] - loss: 12.214 - mae: 44.701 - mean_q: 56.295 Interval 1559 (779000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1239 Interval 1560 (779500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2406 1 episodes - episode_reward: 240.011 [240.011, 240.011] - loss: 10.179 - mae: 44.285 - mean_q: 55.992 Interval 1561 (780000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2900 1 episodes - episode_reward: 168.323 [168.323, 168.323] - loss: 9.691 - mae: 44.293 - mean_q: 56.209 Interval 1562 (780500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9793 2 episodes - episode_reward: -246.612 [-390.381, -102.843] - loss: 14.122 - mae: 43.838 - mean_q: 56.033 Interval 1563 (781000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3680 1 episodes - episode_reward: 215.827 [215.827, 215.827] - loss: 14.514 - mae: 44.485 - mean_q: 56.663 Interval 1564 (781500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1427 Interval 1565 (782000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0263 Interval 1566 (782500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2360 1 episodes - episode_reward: 127.037 [127.037, 127.037] - loss: 9.953 - mae: 43.612 - mean_q: 55.030 Interval 1567 (783000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1477 1 episodes - episode_reward: -78.715 [-78.715, -78.715] - loss: 11.344 - mae: 43.610 - mean_q: 54.921 Interval 1568 (783500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0527 Interval 1569 (784000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0031 Interval 1570 (784500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2394 1 episodes - episode_reward: 243.368 [243.368, 243.368] - loss: 14.134 - mae: 42.811 - mean_q: 53.709 Interval 1571 (785000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3673 1 episodes - episode_reward: 194.760 [194.760, 194.760] - loss: 12.589 - mae: 42.833 - mean_q: 53.877 Interval 1572 (785500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4677 1 episodes - episode_reward: 218.126 [218.126, 218.126] - loss: 11.606 - mae: 42.715 - mean_q: 54.121 Interval 1573 (786000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0276 Interval 1574 (786500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6365 2 episodes - episode_reward: -164.356 [-443.606, 114.895] - loss: 12.468 - mae: 42.225 - mean_q: 53.434 Interval 1575 (787000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1112 Interval 1576 (787500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4058 1 episodes - episode_reward: 183.865 [183.865, 183.865] - loss: 10.854 - mae: 42.179 - mean_q: 53.534 Interval 1577 (788000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8056 2 episodes - episode_reward: 243.687 [202.876, 284.499] - loss: 10.954 - mae: 42.097 - mean_q: 53.211 Interval 1578 (788500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7537 2 episodes - episode_reward: 204.777 [119.781, 289.773] - loss: 9.872 - mae: 41.946 - mean_q: 53.410 Interval 1579 (789000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4106 3 episodes - episode_reward: -71.977 [-346.919, 230.988] - loss: 11.022 - mae: 41.963 - mean_q: 53.824 Interval 1580 (789500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1464 Interval 1581 (790000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4895 1 episodes - episode_reward: 167.287 [167.287, 167.287] - loss: 9.468 - mae: 41.602 - mean_q: 53.891 Interval 1582 (790500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0599 2 episodes - episode_reward: 46.570 [-149.163, 242.302] - loss: 13.407 - mae: 41.500 - mean_q: 53.662 Interval 1583 (791000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5456 1 episodes - episode_reward: 209.260 [209.260, 209.260] - loss: 7.872 - mae: 42.043 - mean_q: 54.084 Interval 1584 (791500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4119 1 episodes - episode_reward: 202.409 [202.409, 202.409] - loss: 9.727 - mae: 41.473 - mean_q: 52.909 Interval 1585 (792000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5547 1 episodes - episode_reward: 238.958 [238.958, 238.958] - loss: 9.848 - mae: 41.035 - mean_q: 52.633 Interval 1586 (792500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0244 2 episodes - episode_reward: 61.376 [-148.280, 271.032] - loss: 8.780 - mae: 41.278 - mean_q: 53.021 Interval 1587 (793000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1606 Interval 1588 (793500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1718 2 episodes - episode_reward: 31.230 [-106.865, 169.324] - loss: 11.271 - mae: 41.351 - mean_q: 53.267 Interval 1589 (794000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0129 2 episodes - episode_reward: 44.183 [-124.823, 213.188] - loss: 10.091 - mae: 41.319 - mean_q: 52.614 Interval 1590 (794500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1893 Interval 1591 (795000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0759 2 episodes - episode_reward: -1.227 [-213.342, 210.888] - loss: 8.999 - mae: 41.489 - mean_q: 53.302 Interval 1592 (795500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1000 2 episodes - episode_reward: -16.499 [-295.866, 262.869] - loss: 9.551 - mae: 41.087 - mean_q: 52.794 Interval 1593 (796000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4613 1 episodes - episode_reward: 270.566 [270.566, 270.566] - loss: 8.319 - mae: 41.296 - mean_q: 52.914 Interval 1594 (796500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4478 1 episodes - episode_reward: 199.421 [199.421, 199.421] - loss: 11.777 - mae: 41.300 - mean_q: 52.439 Interval 1595 (797000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2993 Interval 1596 (797500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1379 Interval 1597 (798000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3612 2 episodes - episode_reward: -44.829 [-187.142, 97.483] - loss: 9.276 - mae: 40.958 - mean_q: 51.748 Interval 1598 (798500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1824 1 episodes - episode_reward: 234.826 [234.826, 234.826] - loss: 10.444 - mae: 41.259 - mean_q: 52.454 Interval 1599 (799000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5072 1 episodes - episode_reward: 206.029 [206.029, 206.029] - loss: 8.994 - mae: 41.230 - mean_q: 51.913 Interval 1600 (799500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4054 1 episodes - episode_reward: 187.983 [187.983, 187.983] - loss: 11.029 - mae: 41.399 - mean_q: 51.704 Interval 1601 (800000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4258 1 episodes - episode_reward: 227.497 [227.497, 227.497] - loss: 8.006 - mae: 41.285 - mean_q: 51.884 Interval 1602 (800500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2639 1 episodes - episode_reward: -135.658 [-135.658, -135.658] - loss: 7.471 - mae: 41.307 - mean_q: 52.261 Interval 1603 (801000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0395 1 episodes - episode_reward: -90.769 [-90.769, -90.769] - loss: 13.021 - mae: 41.103 - mean_q: 52.358 Interval 1604 (801500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4105 1 episodes - episode_reward: 255.282 [255.282, 255.282] - loss: 11.172 - mae: 41.087 - mean_q: 52.445 Interval 1605 (802000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1793 2 episodes - episode_reward: 24.316 [-150.656, 199.289] - loss: 8.457 - mae: 40.856 - mean_q: 52.307 Interval 1606 (802500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.9344 2 episodes - episode_reward: 256.619 [248.711, 264.527] - loss: 8.723 - mae: 40.495 - mean_q: 51.772 Interval 1607 (803000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3255 1 episodes - episode_reward: 180.021 [180.021, 180.021] - loss: 8.226 - mae: 40.447 - mean_q: 52.008 Interval 1608 (803500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3647 1 episodes - episode_reward: 238.830 [238.830, 238.830] - loss: 6.504 - mae: 40.389 - mean_q: 51.739 Interval 1609 (804000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3864 1 episodes - episode_reward: 131.475 [131.475, 131.475] - loss: 7.849 - mae: 40.230 - mean_q: 51.487 Interval 1610 (804500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0064 1 episodes - episode_reward: 177.119 [177.119, 177.119] - loss: 7.392 - mae: 40.299 - mean_q: 50.908 Interval 1611 (805000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7484 2 episodes - episode_reward: -217.190 [-230.814, -203.565] - loss: 8.696 - mae: 40.045 - mean_q: 50.593 Interval 1612 (805500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0103 1 episodes - episode_reward: -76.942 [-76.942, -76.942] - loss: 8.800 - mae: 40.247 - mean_q: 51.436 Interval 1613 (806000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0350 Interval 1614 (806500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2998 1 episodes - episode_reward: 126.628 [126.628, 126.628] - loss: 7.433 - mae: 40.247 - mean_q: 51.334 Interval 1615 (807000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5324 1 episodes - episode_reward: 218.447 [218.447, 218.447] - loss: 9.773 - mae: 39.920 - mean_q: 50.978 Interval 1616 (807500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0025 1 episodes - episode_reward: 214.560 [214.560, 214.560] - loss: 7.374 - mae: 39.571 - mean_q: 50.414 Interval 1617 (808000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9603 2 episodes - episode_reward: -283.556 [-326.058, -241.055] - loss: 7.763 - mae: 39.796 - mean_q: 50.133 Interval 1618 (808500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1029 1 episodes - episode_reward: -126.437 [-126.437, -126.437] - loss: 12.089 - mae: 39.987 - mean_q: 49.906 Interval 1619 (809000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4794 1 episodes - episode_reward: 171.299 [171.299, 171.299] - loss: 8.735 - mae: 39.803 - mean_q: 50.452 Interval 1620 (809500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3126 2 episodes - episode_reward: 134.678 [3.332, 266.025] - loss: 8.646 - mae: 39.952 - mean_q: 50.166 Interval 1621 (810000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2735 1 episodes - episode_reward: 248.035 [248.035, 248.035] - loss: 9.781 - mae: 40.084 - mean_q: 50.902 Interval 1622 (810500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1249 3 episodes - episode_reward: -79.580 [-143.830, -35.632] - loss: 9.479 - mae: 39.781 - mean_q: 50.336 Interval 1623 (811000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4852 2 episodes - episode_reward: 157.902 [73.713, 242.090] - loss: 9.911 - mae: 39.418 - mean_q: 50.101 Interval 1624 (811500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1425 2 episodes - episode_reward: -4.428 [-23.698, 14.842] - loss: 9.009 - mae: 40.159 - mean_q: 51.104 Interval 1625 (812000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4570 1 episodes - episode_reward: 170.926 [170.926, 170.926] - loss: 10.762 - mae: 40.031 - mean_q: 50.782 Interval 1626 (812500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0016 1 episodes - episode_reward: -46.387 [-46.387, -46.387] - loss: 9.052 - mae: 40.033 - mean_q: 50.836 Interval 1627 (813000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3022 2 episodes - episode_reward: 97.157 [-8.205, 202.520] - loss: 12.448 - mae: 39.579 - mean_q: 51.049 Interval 1628 (813500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0502 1 episodes - episode_reward: -125.405 [-125.405, -125.405] - loss: 12.852 - mae: 39.800 - mean_q: 51.355 Interval 1629 (814000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4290 2 episodes - episode_reward: 162.602 [126.966, 198.238] - loss: 8.769 - mae: 40.028 - mean_q: 51.527 Interval 1630 (814500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1932 1 episodes - episode_reward: -173.325 [-173.325, -173.325] - loss: 8.630 - mae: 40.407 - mean_q: 52.179 Interval 1631 (815000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1557 2 episodes - episode_reward: 30.404 [-100.000, 160.808] - loss: 7.699 - mae: 40.002 - mean_q: 52.059 Interval 1632 (815500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0359 Interval 1633 (816000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6475 3 episodes - episode_reward: 96.217 [-119.138, 209.614] - loss: 8.129 - mae: 39.635 - mean_q: 51.684 Interval 1634 (816500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2391 1 episodes - episode_reward: 230.832 [230.832, 230.832] - loss: 8.197 - mae: 39.896 - mean_q: 52.016 Interval 1635 (817000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5204 3 episodes - episode_reward: -83.753 [-128.861, -6.387] - loss: 10.995 - mae: 40.121 - mean_q: 52.105 Interval 1636 (817500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1717 Interval 1637 (818000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3397 1 episodes - episode_reward: 199.909 [199.909, 199.909] - loss: 11.953 - mae: 40.502 - mean_q: 52.683 Interval 1638 (818500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0839 2 episodes - episode_reward: 32.521 [-91.544, 156.587] - loss: 12.733 - mae: 40.534 - mean_q: 52.953 Interval 1639 (819000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1252 1 episodes - episode_reward: -173.589 [-173.589, -173.589] - loss: 9.856 - mae: 40.963 - mean_q: 53.180 Interval 1640 (819500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3703 2 episodes - episode_reward: 30.090 [-112.993, 173.173] - loss: 8.688 - mae: 40.960 - mean_q: 53.218 Interval 1641 (820000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0235 Interval 1642 (820500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0114 1 episodes - episode_reward: 187.449 [187.449, 187.449] - loss: 8.736 - mae: 40.787 - mean_q: 53.075 Interval 1643 (821000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0323 1 episodes - episode_reward: -99.568 [-99.568, -99.568] - loss: 11.104 - mae: 41.517 - mean_q: 53.717 Interval 1644 (821500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6920 2 episodes - episode_reward: -162.827 [-558.957, 233.304] - loss: 11.936 - mae: 41.626 - mean_q: 54.265 Interval 1645 (822000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5208 1 episodes - episode_reward: 244.475 [244.475, 244.475] - loss: 12.306 - mae: 41.592 - mean_q: 54.180 Interval 1646 (822500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2826 1 episodes - episode_reward: 265.189 [265.189, 265.189] - loss: 13.190 - mae: 41.458 - mean_q: 53.851 Interval 1647 (823000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0659 Interval 1648 (823500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3683 1 episodes - episode_reward: 158.431 [158.431, 158.431] - loss: 11.312 - mae: 41.592 - mean_q: 54.157 Interval 1649 (824000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0858 1 episodes - episode_reward: 17.295 [17.295, 17.295] - loss: 9.163 - mae: 41.830 - mean_q: 54.081 Interval 1650 (824500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0185 Interval 1651 (825000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3967 4 episodes - episode_reward: -38.574 [-133.077, 153.894] - loss: 8.543 - mae: 41.801 - mean_q: 53.914 Interval 1652 (825500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3974 1 episodes - episode_reward: 191.345 [191.345, 191.345] - loss: 10.710 - mae: 41.882 - mean_q: 54.199 Interval 1653 (826000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1738 3 episodes - episode_reward: 7.638 [-121.554, 244.467] - loss: 10.826 - mae: 42.093 - mean_q: 54.304 Interval 1654 (826500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0316 Interval 1655 (827000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1220 Interval 1656 (827500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4472 1 episodes - episode_reward: 133.381 [133.381, 133.381] - loss: 9.355 - mae: 41.380 - mean_q: 53.232 Interval 1657 (828000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0620 Interval 1658 (828500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0217 1 episodes - episode_reward: 130.761 [130.761, 130.761] - loss: 8.147 - mae: 41.522 - mean_q: 53.558 Interval 1659 (829000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1758 Interval 1660 (829500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1886 2 episodes - episode_reward: -5.572 [-177.929, 166.785] - loss: 9.320 - mae: 41.549 - mean_q: 53.195 Interval 1661 (830000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2258 1 episodes - episode_reward: 204.879 [204.879, 204.879] - loss: 10.438 - mae: 41.585 - mean_q: 52.774 Interval 1662 (830500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3471 2 episodes - episode_reward: -170.141 [-267.327, -72.954] - loss: 6.701 - mae: 41.690 - mean_q: 52.910 Interval 1663 (831000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3068 2 episodes - episode_reward: 32.817 [-127.595, 193.229] - loss: 10.412 - mae: 41.841 - mean_q: 53.257 Interval 1664 (831500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4585 1 episodes - episode_reward: 243.289 [243.289, 243.289] - loss: 7.697 - mae: 41.538 - mean_q: 52.526 Interval 1665 (832000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4955 1 episodes - episode_reward: 230.034 [230.034, 230.034] - loss: 8.807 - mae: 41.310 - mean_q: 51.833 Interval 1666 (832500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4799 1 episodes - episode_reward: 247.116 [247.116, 247.116] - loss: 8.131 - mae: 41.162 - mean_q: 52.155 Interval 1667 (833000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6426 2 episodes - episode_reward: 243.183 [241.089, 245.277] - loss: 7.534 - mae: 40.827 - mean_q: 51.888 Interval 1668 (833500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3092 2 episodes - episode_reward: 16.482 [-146.056, 179.021] - loss: 8.572 - mae: 41.176 - mean_q: 52.302 Interval 1669 (834000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7690 2 episodes - episode_reward: 242.158 [236.444, 247.871] - loss: 7.961 - mae: 41.163 - mean_q: 51.957 Interval 1670 (834500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5589 1 episodes - episode_reward: 242.898 [242.898, 242.898] - loss: 11.926 - mae: 41.692 - mean_q: 52.445 Interval 1671 (835000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0592 2 episodes - episode_reward: -16.948 [-263.942, 230.046] - loss: 7.118 - mae: 42.041 - mean_q: 53.069 Interval 1672 (835500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1432 2 episodes - episode_reward: 31.856 [-170.449, 234.161] - loss: 8.213 - mae: 41.393 - mean_q: 52.536 Interval 1673 (836000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1561 2 episodes - episode_reward: -84.946 [-130.345, -39.548] - loss: 8.221 - mae: 41.406 - mean_q: 51.754 Interval 1674 (836500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5736 1 episodes - episode_reward: 173.610 [173.610, 173.610] - loss: 9.029 - mae: 41.374 - mean_q: 52.208 Interval 1675 (837000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1821 3 episodes - episode_reward: 20.545 [-136.124, 267.415] - loss: 8.859 - mae: 41.703 - mean_q: 52.402 Interval 1676 (837500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9313 1 episodes - episode_reward: -453.033 [-453.033, -453.033] - loss: 8.341 - mae: 41.464 - mean_q: 51.918 Interval 1677 (838000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1466 Interval 1678 (838500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5397 1 episodes - episode_reward: -298.492 [-298.492, -298.492] - loss: 9.923 - mae: 41.057 - mean_q: 51.683 Interval 1679 (839000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3413 1 episodes - episode_reward: 224.888 [224.888, 224.888] - loss: 7.248 - mae: 40.967 - mean_q: 51.669 Interval 1680 (839500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0861 1 episodes - episode_reward: 1.524 [1.524, 1.524] - loss: 6.384 - mae: 40.890 - mean_q: 51.431 Interval 1681 (840000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0358 Interval 1682 (840500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2657 2 episodes - episode_reward: 34.989 [-101.734, 171.712] - loss: 9.257 - mae: 40.710 - mean_q: 50.922 Interval 1683 (841000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1581 3 episodes - episode_reward: -21.091 [-198.870, 238.227] - loss: 10.494 - mae: 40.600 - mean_q: 50.924 Interval 1684 (841500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1346 3 episodes - episode_reward: 37.587 [-103.310, 219.459] - loss: 9.472 - mae: 40.924 - mean_q: 50.858 Interval 1685 (842000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2074 1 episodes - episode_reward: 47.233 [47.233, 47.233] - loss: 9.162 - mae: 40.686 - mean_q: 50.726 Interval 1686 (842500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0046 Interval 1687 (843000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5587 1 episodes - episode_reward: 233.566 [233.566, 233.566] - loss: 7.404 - mae: 40.773 - mean_q: 50.796 Interval 1688 (843500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1390 1 episodes - episode_reward: 282.099 [282.099, 282.099] - loss: 11.064 - mae: 40.898 - mean_q: 51.270 Interval 1689 (844000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4522 1 episodes - episode_reward: 194.161 [194.161, 194.161] - loss: 10.793 - mae: 41.121 - mean_q: 51.595 Interval 1690 (844500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4264 1 episodes - episode_reward: 198.019 [198.019, 198.019] - loss: 8.750 - mae: 41.189 - mean_q: 51.197 Interval 1691 (845000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2885 1 episodes - episode_reward: 145.098 [145.098, 145.098] - loss: 7.690 - mae: 41.198 - mean_q: 51.454 Interval 1692 (845500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0071 Interval 1693 (846000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3051 1 episodes - episode_reward: 116.612 [116.612, 116.612] - loss: 8.335 - mae: 41.162 - mean_q: 51.883 Interval 1694 (846500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2934 2 episodes - episode_reward: 107.700 [9.922, 205.478] - loss: 8.753 - mae: 41.012 - mean_q: 51.428 Interval 1695 (847000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0303 1 episodes - episode_reward: -31.956 [-31.956, -31.956] - loss: 8.338 - mae: 41.524 - mean_q: 52.046 Interval 1696 (847500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2532 1 episodes - episode_reward: 172.704 [172.704, 172.704] - loss: 9.629 - mae: 41.036 - mean_q: 51.571 Interval 1697 (848000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1006 1 episodes - episode_reward: -63.289 [-63.289, -63.289] - loss: 9.686 - mae: 41.307 - mean_q: 51.928 Interval 1698 (848500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0280 Interval 1699 (849000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0257 Interval 1700 (849500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0294 Interval 1701 (850000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3967 1 episodes - episode_reward: 181.650 [181.650, 181.650] - loss: 9.970 - mae: 40.951 - mean_q: 51.502 Interval 1702 (850500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1056 4 episodes - episode_reward: 6.989 [-315.515, 254.957] - loss: 10.541 - mae: 41.281 - mean_q: 52.230 Interval 1703 (851000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3801 1 episodes - episode_reward: 215.847 [215.847, 215.847] - loss: 7.346 - mae: 41.245 - mean_q: 52.205 Interval 1704 (851500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4554 3 episodes - episode_reward: 19.929 [-115.473, 271.993] - loss: 5.840 - mae: 41.515 - mean_q: 52.484 Interval 1705 (852000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4768 1 episodes - episode_reward: 242.881 [242.881, 242.881] - loss: 8.545 - mae: 41.332 - mean_q: 52.278 Interval 1706 (852500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2983 1 episodes - episode_reward: 260.888 [260.888, 260.888] - loss: 7.967 - mae: 41.132 - mean_q: 52.005 Interval 1707 (853000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3224 3 episodes - episode_reward: -38.344 [-89.940, 12.367] - loss: 9.643 - mae: 41.135 - mean_q: 52.139 Interval 1708 (853500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2987 1 episodes - episode_reward: 74.400 [74.400, 74.400] - loss: 8.835 - mae: 41.849 - mean_q: 52.857 Interval 1709 (854000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0231 2 episodes - episode_reward: 67.273 [-49.654, 184.201] - loss: 8.802 - mae: 41.871 - mean_q: 52.239 Interval 1710 (854500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4073 Interval 1711 (855000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0207 2 episodes - episode_reward: 36.567 [-164.888, 238.022] - loss: 7.588 - mae: 41.604 - mean_q: 52.442 Interval 1712 (855500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0852 2 episodes - episode_reward: 74.976 [-104.169, 254.120] - loss: 13.999 - mae: 41.806 - mean_q: 53.018 Interval 1713 (856000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8897 1 episodes - episode_reward: 263.447 [263.447, 263.447] - loss: 8.939 - mae: 42.052 - mean_q: 53.241 Interval 1714 (856500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2283 3 episodes - episode_reward: 61.636 [-108.289, 290.726] - loss: 10.114 - mae: 42.029 - mean_q: 53.020 Interval 1715 (857000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4858 1 episodes - episode_reward: 203.278 [203.278, 203.278] - loss: 10.059 - mae: 42.080 - mean_q: 52.411 Interval 1716 (857500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5905 1 episodes - episode_reward: 238.307 [238.307, 238.307] - loss: 9.221 - mae: 42.175 - mean_q: 52.966 Interval 1717 (858000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5873 2 episodes - episode_reward: 162.951 [45.329, 280.573] - loss: 10.812 - mae: 42.521 - mean_q: 53.134 Interval 1718 (858500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.9068 2 episodes - episode_reward: 232.863 [216.326, 249.399] - loss: 10.245 - mae: 42.301 - mean_q: 53.241 Interval 1719 (859000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3093 2 episodes - episode_reward: 63.708 [-100.000, 227.417] - loss: 9.784 - mae: 42.485 - mean_q: 53.596 Interval 1720 (859500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5366 1 episodes - episode_reward: 235.621 [235.621, 235.621] - loss: 8.969 - mae: 42.461 - mean_q: 53.464 Interval 1721 (860000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6996 3 episodes - episode_reward: -64.467 [-431.538, 277.537] - loss: 10.832 - mae: 42.236 - mean_q: 52.828 Interval 1722 (860500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5556 1 episodes - episode_reward: 219.032 [219.032, 219.032] - loss: 8.635 - mae: 42.001 - mean_q: 52.652 Interval 1723 (861000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0179 2 episodes - episode_reward: 7.395 [-192.335, 207.126] - loss: 10.600 - mae: 42.013 - mean_q: 52.512 Interval 1724 (861500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1988 3 episodes - episode_reward: 21.005 [-134.576, 246.749] - loss: 9.871 - mae: 41.572 - mean_q: 52.251 Interval 1725 (862000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0229 Interval 1726 (862500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2531 1 episodes - episode_reward: 188.802 [188.802, 188.802] - loss: 7.868 - mae: 41.699 - mean_q: 52.617 Interval 1727 (863000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1133 3 episodes - episode_reward: 4.383 [-161.126, 210.111] - loss: 11.684 - mae: 41.414 - mean_q: 51.803 Interval 1728 (863500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1839 Interval 1729 (864000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.9046 2 episodes - episode_reward: 229.144 [165.502, 292.787] - loss: 10.465 - mae: 41.511 - mean_q: 52.280 Interval 1730 (864500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2526 2 episodes - episode_reward: 53.200 [-124.461, 230.861] - loss: 12.506 - mae: 41.933 - mean_q: 52.435 Interval 1731 (865000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.5190 1 episodes - episode_reward: 206.009 [206.009, 206.009] - loss: 12.570 - mae: 42.052 - mean_q: 52.849 Interval 1732 (865500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1491 2 episodes - episode_reward: 29.304 [-175.478, 234.087] - loss: 11.461 - mae: 42.487 - mean_q: 53.236 Interval 1733 (866000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.4987 1 episodes - episode_reward: 250.826 [250.826, 250.826] - loss: 17.014 - mae: 42.481 - mean_q: 53.230 Interval 1734 (866500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6722 2 episodes - episode_reward: -9.684 [-281.763, 262.395] - loss: 11.738 - mae: 42.719 - mean_q: 53.707 Interval 1735 (867000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.3278 6 episodes - episode_reward: -229.758 [-482.907, -100.000] - loss: 12.401 - mae: 42.886 - mean_q: 54.208 Interval 1736 (867500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1449 1 episodes - episode_reward: 20.235 [20.235, 20.235] - loss: 11.996 - mae: 42.838 - mean_q: 53.734 Interval 1737 (868000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.7383 2 episodes - episode_reward: 223.579 [193.515, 253.643] - loss: 9.610 - mae: 42.933 - mean_q: 53.874 Interval 1738 (868500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0728 2 episodes - episode_reward: -45.765 [-80.200, -11.330] - loss: 11.458 - mae: 43.410 - mean_q: 54.527 Interval 1739 (869000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.3036 1 episodes - episode_reward: 242.372 [242.372, 242.372] - loss: 7.989 - mae: 43.378 - mean_q: 54.712 Interval 1740 (869500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1899 Interval 1741 (870000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3847 3 episodes - episode_reward: 100.819 [-100.000, 224.115] - loss: 10.847 - mae: 43.826 - mean_q: 55.084 Interval 1742 (870500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.6447 1 episodes - episode_reward: 257.936 [257.936, 257.936] - loss: 8.199 - mae: 43.890 - mean_q: 55.474 Interval 1743 (871000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1477 Interval 1744 (871500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0228 2 episodes - episode_reward: 20.730 [-184.843, 226.302] - loss: 10.960 - mae: 44.337 - mean_q: 55.936 Interval 1745 (872000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1811 Interval 1746 (872500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8808 3 episodes - episode_reward: -103.407 [-618.383, 276.096] - loss: 10.141 - mae: 44.294 - mean_q: 56.093 Interval 1747 (873000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5621 1 episodes - episode_reward: 225.172 [225.172, 225.172] - loss: 10.293 - mae: 44.656 - mean_q: 56.057 Interval 1748 (873500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5073 2 episodes - episode_reward: 138.514 [20.146, 256.883] - loss: 11.141 - mae: 44.838 - mean_q: 56.877 Interval 1749 (874000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.5296 1 episodes - episode_reward: 239.609 [239.609, 239.609] - loss: 9.943 - mae: 45.018 - mean_q: 57.011 Interval 1750 (874500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0605 2 episodes - episode_reward: 84.401 [-25.113, 193.914] - loss: 10.956 - mae: 45.027 - mean_q: 56.842 Interval 1751 (875000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2588 Interval 1752 (875500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2171 2 episodes - episode_reward: 42.132 [-100.165, 184.429] - loss: 11.686 - mae: 45.645 - mean_q: 57.676 Interval 1753 (876000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2393 2 episodes - episode_reward: 56.630 [-119.216, 232.475] - loss: 9.295 - mae: 45.925 - mean_q: 57.932 Interval 1754 (876500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0279 1 episodes - episode_reward: 217.617 [217.617, 217.617] - loss: 11.504 - mae: 45.290 - mean_q: 57.125 Interval 1755 (877000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4893 1 episodes - episode_reward: -282.466 [-282.466, -282.466] - loss: 9.520 - mae: 45.643 - mean_q: 57.430 Interval 1756 (877500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3655 3 episodes - episode_reward: -127.044 [-209.149, -69.955] - loss: 10.603 - mae: 45.830 - mean_q: 57.635 Interval 1757 (878000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5450 2 episodes - episode_reward: 137.200 [25.952, 248.448] - loss: 10.914 - mae: 45.875 - mean_q: 58.297 Interval 1758 (878500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2979 2 episodes - episode_reward: 19.855 [-216.081, 255.791] - loss: 11.387 - mae: 45.666 - mean_q: 58.126 Interval 1759 (879000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1340 1 episodes - episode_reward: -172.666 [-172.666, -172.666] - loss: 11.623 - mae: 45.491 - mean_q: 58.213 Interval 1760 (879500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3775 3 episodes - episode_reward: -28.249 [-125.618, 162.664] - loss: 10.741 - mae: 44.996 - mean_q: 57.332 Interval 1761 (880000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4972 2 episodes - episode_reward: 36.705 [-83.529, 156.938] - loss: 13.166 - mae: 45.178 - mean_q: 57.661 Interval 1762 (880500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3057 4 episodes - episode_reward: -21.660 [-113.188, 237.799] - loss: 13.464 - mae: 44.933 - mean_q: 56.927 Interval 1763 (881000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0687 1 episodes - episode_reward: 149.545 [149.545, 149.545] - loss: 9.383 - mae: 44.723 - mean_q: 57.224 Interval 1764 (881500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1655 2 episodes - episode_reward: 12.757 [-211.462, 236.976] - loss: 11.624 - mae: 44.616 - mean_q: 56.605 Interval 1765 (882000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2996 2 episodes - episode_reward: 60.642 [-62.941, 184.225] - loss: 10.810 - mae: 44.475 - mean_q: 56.612 Interval 1766 (882500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0803 1 episodes - episode_reward: -114.597 [-114.597, -114.597] - loss: 12.438 - mae: 44.405 - mean_q: 56.137 Interval 1767 (883000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0617 3 episodes - episode_reward: -28.293 [-129.798, 144.920] - loss: 10.819 - mae: 44.540 - mean_q: 56.177 Interval 1768 (883500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1829 1 episodes - episode_reward: 202.970 [202.970, 202.970] - loss: 10.092 - mae: 44.174 - mean_q: 55.477 Interval 1769 (884000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3828 1 episodes - episode_reward: 115.009 [115.009, 115.009] - loss: 10.815 - mae: 44.529 - mean_q: 56.077 Interval 1770 (884500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3922 2 episodes - episode_reward: -77.037 [-120.331, -33.742] - loss: 11.487 - mae: 44.256 - mean_q: 55.755 Interval 1771 (885000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1525 Interval 1772 (885500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1669 2 episodes - episode_reward: 43.197 [-111.329, 197.724] - loss: 14.612 - mae: 43.678 - mean_q: 55.220 Interval 1773 (886000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1866 3 episodes - episode_reward: -144.520 [-465.707, 193.909] - loss: 12.450 - mae: 43.229 - mean_q: 54.248 Interval 1774 (886500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0058 1 episodes - episode_reward: -201.759 [-201.759, -201.759] - loss: 11.206 - mae: 42.830 - mean_q: 53.624 Interval 1775 (887000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0950 3 episodes - episode_reward: 28.324 [-135.207, 249.738] - loss: 11.811 - mae: 42.440 - mean_q: 52.634 Interval 1776 (887500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6628 4 episodes - episode_reward: -89.960 [-137.453, -31.198] - loss: 14.814 - mae: 42.544 - mean_q: 52.473 Interval 1777 (888000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0382 1 episodes - episode_reward: -74.123 [-74.123, -74.123] - loss: 12.642 - mae: 42.744 - mean_q: 52.576 Interval 1778 (888500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0177 2 episodes - episode_reward: 61.912 [-98.977, 222.801] - loss: 9.921 - mae: 42.280 - mean_q: 52.004 Interval 1779 (889000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0485 3 episodes - episode_reward: -36.990 [-210.006, 205.399] - loss: 9.966 - mae: 41.856 - mean_q: 51.874 Interval 1780 (889500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2439 2 episodes - episode_reward: 97.365 [-108.056, 302.786] - loss: 11.356 - mae: 41.978 - mean_q: 51.503 Interval 1781 (890000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4716 1 episodes - episode_reward: 228.559 [228.559, 228.559] - loss: 15.062 - mae: 41.825 - mean_q: 51.421 Interval 1782 (890500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1878 1 episodes - episode_reward: 191.353 [191.353, 191.353] - loss: 12.562 - mae: 41.496 - mean_q: 51.224 Interval 1783 (891000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1673 Interval 1784 (891500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1609 Interval 1785 (892000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1649 3 episodes - episode_reward: -101.385 [-237.347, 59.817] - loss: 10.189 - mae: 40.391 - mean_q: 50.495 Interval 1786 (892500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4838 1 episodes - episode_reward: 293.267 [293.267, 293.267] - loss: 10.711 - mae: 40.681 - mean_q: 51.347 Interval 1787 (893000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1639 1 episodes - episode_reward: 212.366 [212.366, 212.366] - loss: 11.435 - mae: 40.801 - mean_q: 50.894 Interval 1788 (893500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2667 Interval 1789 (894000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3253 1 episodes - episode_reward: 224.134 [224.134, 224.134] - loss: 9.213 - mae: 40.954 - mean_q: 51.647 Interval 1790 (894500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3021 1 episodes - episode_reward: -85.659 [-85.659, -85.659] - loss: 11.500 - mae: 40.638 - mean_q: 50.608 Interval 1791 (895000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0665 2 episodes - episode_reward: 7.379 [-165.954, 180.712] - loss: 12.074 - mae: 41.149 - mean_q: 51.364 Interval 1792 (895500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2249 1 episodes - episode_reward: -38.049 [-38.049, -38.049] - loss: 11.068 - mae: 41.318 - mean_q: 51.138 Interval 1793 (896000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2766 1 episodes - episode_reward: -281.182 [-281.182, -281.182] - loss: 12.447 - mae: 41.503 - mean_q: 50.902 Interval 1794 (896500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.7415 2 episodes - episode_reward: 237.454 [201.213, 273.695] - loss: 15.187 - mae: 41.535 - mean_q: 51.259 Interval 1795 (897000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2567 2 episodes - episode_reward: -66.528 [-74.105, -58.952] - loss: 11.856 - mae: 41.561 - mean_q: 50.984 Interval 1796 (897500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4904 1 episodes - episode_reward: 178.545 [178.545, 178.545] - loss: 12.881 - mae: 41.929 - mean_q: 51.429 Interval 1797 (898000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4510 1 episodes - episode_reward: 205.963 [205.963, 205.963] - loss: 12.340 - mae: 41.709 - mean_q: 50.802 Interval 1798 (898500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1271 Interval 1799 (899000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1066 Interval 1800 (899500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.3070 5 episodes - episode_reward: 42.546 [-100.000, 315.892] - loss: 11.801 - mae: 42.255 - mean_q: 52.140 Interval 1801 (900000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1075 Interval 1802 (900500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4266 1 episodes - episode_reward: 110.063 [110.063, 110.063] - loss: 13.171 - mae: 42.434 - mean_q: 52.194 Interval 1803 (901000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.4758 1 episodes - episode_reward: 233.613 [233.613, 233.613] - loss: 14.564 - mae: 42.491 - mean_q: 52.349 Interval 1804 (901500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5599 1 episodes - episode_reward: -208.740 [-208.740, -208.740] - loss: 10.987 - mae: 42.456 - mean_q: 52.589 Interval 1805 (902000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1938 2 episodes - episode_reward: -33.388 [-285.057, 218.281] - loss: 13.744 - mae: 42.481 - mean_q: 52.128 Interval 1806 (902500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1576 2 episodes - episode_reward: -107.184 [-116.789, -97.579] - loss: 15.014 - mae: 42.616 - mean_q: 52.372 Interval 1807 (903000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0258 1 episodes - episode_reward: 206.846 [206.846, 206.846] - loss: 11.742 - mae: 42.400 - mean_q: 51.787 Interval 1808 (903500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3410 1 episodes - episode_reward: -232.204 [-232.204, -232.204] - loss: 10.893 - mae: 42.443 - mean_q: 52.628 Interval 1809 (904000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4954 1 episodes - episode_reward: -264.352 [-264.352, -264.352] - loss: 13.324 - mae: 42.105 - mean_q: 52.005 Interval 1810 (904500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6116 1 episodes - episode_reward: -318.257 [-318.257, -318.257] - loss: 14.195 - mae: 42.044 - mean_q: 52.205 Interval 1811 (905000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0246 2 episodes - episode_reward: -1.281 [-214.706, 212.143] - loss: 13.842 - mae: 41.644 - mean_q: 51.567 Interval 1812 (905500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6556 1 episodes - episode_reward: 248.285 [248.285, 248.285] - loss: 13.109 - mae: 41.790 - mean_q: 51.845 Interval 1813 (906000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0993 Interval 1814 (906500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0746 2 episodes - episode_reward: 2.854 [-147.432, 153.140] - loss: 12.929 - mae: 41.503 - mean_q: 51.241 Interval 1815 (907000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2852 1 episodes - episode_reward: -80.461 [-80.461, -80.461] - loss: 14.241 - mae: 41.544 - mean_q: 51.317 Interval 1816 (907500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1436 1 episodes - episode_reward: -123.139 [-123.139, -123.139] - loss: 13.956 - mae: 41.504 - mean_q: 50.735 Interval 1817 (908000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.5079 1 episodes - episode_reward: 140.962 [140.962, 140.962] - loss: 10.213 - mae: 41.847 - mean_q: 51.281 Interval 1818 (908500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0464 1 episodes - episode_reward: 271.025 [271.025, 271.025] - loss: 11.845 - mae: 42.075 - mean_q: 51.344 Interval 1819 (909000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3664 2 episodes - episode_reward: 9.196 [-210.200, 228.593] - loss: 13.746 - mae: 42.153 - mean_q: 51.973 Interval 1820 (909500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.1706 Interval 1821 (910000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.2655 2 episodes - episode_reward: 86.731 [-12.025, 185.487] - loss: 12.190 - mae: 42.151 - mean_q: 52.268 Interval 1822 (910500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4715 3 episodes - episode_reward: -46.007 [-147.575, 10.103] - loss: 14.510 - mae: 42.268 - mean_q: 52.387 Interval 1823 (911000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1729 Interval 1824 (911500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0293 Interval 1825 (912000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0573 Interval 1826 (912500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0442 Interval 1827 (913000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3807 1 episodes - episode_reward: 50.460 [50.460, 50.460] - loss: 11.901 - mae: 40.664 - mean_q: 49.885 Interval 1828 (913500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3924 1 episodes - episode_reward: 233.308 [233.308, 233.308] - loss: 10.085 - mae: 40.882 - mean_q: 50.087 Interval 1829 (914000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4802 1 episodes - episode_reward: 193.639 [193.639, 193.639] - loss: 11.433 - mae: 40.587 - mean_q: 49.504 Interval 1830 (914500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0926 1 episodes - episode_reward: 236.167 [236.167, 236.167] - loss: 9.991 - mae: 40.983 - mean_q: 50.141 Interval 1831 (915000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1525 1 episodes - episode_reward: -85.410 [-85.410, -85.410] - loss: 12.374 - mae: 40.924 - mean_q: 49.892 Interval 1832 (915500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4706 2 episodes - episode_reward: -74.251 [-132.551, -15.950] - loss: 13.139 - mae: 40.464 - mean_q: 49.744 Interval 1833 (916000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2792 1 episodes - episode_reward: 132.785 [132.785, 132.785] - loss: 12.422 - mae: 40.653 - mean_q: 49.479 Interval 1834 (916500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2723 Interval 1835 (917000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0566 2 episodes - episode_reward: 46.079 [-159.140, 251.299] - loss: 12.950 - mae: 40.511 - mean_q: 49.758 Interval 1836 (917500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0551 1 episodes - episode_reward: -19.641 [-19.641, -19.641] - loss: 11.423 - mae: 41.032 - mean_q: 50.050 Interval 1837 (918000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0676 1 episodes - episode_reward: -42.105 [-42.105, -42.105] - loss: 11.735 - mae: 40.578 - mean_q: 49.185 Interval 1838 (918500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1484 2 episodes - episode_reward: 64.226 [-100.000, 228.451] - loss: 9.919 - mae: 41.050 - mean_q: 49.822 Interval 1839 (919000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1365 1 episodes - episode_reward: 110.628 [110.628, 110.628] - loss: 12.346 - mae: 40.557 - mean_q: 49.714 Interval 1840 (919500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3800 1 episodes - episode_reward: 188.207 [188.207, 188.207] - loss: 11.560 - mae: 40.957 - mean_q: 50.522 Interval 1841 (920000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4581 1 episodes - episode_reward: 196.913 [196.913, 196.913] - loss: 10.633 - mae: 40.974 - mean_q: 50.225 Interval 1842 (920500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0455 Interval 1843 (921000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6606 2 episodes - episode_reward: 195.492 [152.155, 238.830] - loss: 10.862 - mae: 40.946 - mean_q: 49.927 Interval 1844 (921500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0493 Interval 1845 (922000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2265 2 episodes - episode_reward: 7.423 [-100.000, 114.847] - loss: 9.820 - mae: 40.927 - mean_q: 50.783 Interval 1846 (922500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2127 1 episodes - episode_reward: 208.070 [208.070, 208.070] - loss: 12.047 - mae: 41.188 - mean_q: 51.079 Interval 1847 (923000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4465 2 episodes - episode_reward: -145.338 [-241.758, -48.918] - loss: 14.371 - mae: 41.063 - mean_q: 51.263 Interval 1848 (923500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4104 1 episodes - episode_reward: 243.747 [243.747, 243.747] - loss: 9.469 - mae: 40.735 - mean_q: 50.818 Interval 1849 (924000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9352 3 episodes - episode_reward: -141.902 [-196.435, -100.000] - loss: 9.912 - mae: 40.898 - mean_q: 51.076 Interval 1850 (924500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9149 2 episodes - episode_reward: -251.025 [-360.299, -141.750] - loss: 11.903 - mae: 40.670 - mean_q: 50.609 Interval 1851 (925000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1891 1 episodes - episode_reward: 140.384 [140.384, 140.384] - loss: 13.257 - mae: 41.012 - mean_q: 50.716 Interval 1852 (925500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2420 Interval 1853 (926000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1559 1 episodes - episode_reward: 262.550 [262.550, 262.550] - loss: 12.578 - mae: 40.017 - mean_q: 49.174 Interval 1854 (926500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1477 Interval 1855 (927000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1081 Interval 1856 (927500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1737 Interval 1857 (928000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0249 Interval 1858 (928500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0840 1 episodes - episode_reward: -383.429 [-383.429, -383.429] - loss: 11.935 - mae: 39.200 - mean_q: 48.999 Interval 1859 (929000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1417 1 episodes - episode_reward: -198.631 [-198.631, -198.631] - loss: 12.699 - mae: 39.170 - mean_q: 48.452 Interval 1860 (929500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1283 1 episodes - episode_reward: 266.635 [266.635, 266.635] - loss: 11.764 - mae: 38.962 - mean_q: 48.578 Interval 1861 (930000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0139 1 episodes - episode_reward: -172.835 [-172.835, -172.835] - loss: 9.981 - mae: 38.798 - mean_q: 48.312 Interval 1862 (930500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4654 1 episodes - episode_reward: 248.016 [248.016, 248.016] - loss: 14.097 - mae: 39.262 - mean_q: 49.262 Interval 1863 (931000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4569 1 episodes - episode_reward: 281.638 [281.638, 281.638] - loss: 12.229 - mae: 39.475 - mean_q: 49.982 Interval 1864 (931500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2805 1 episodes - episode_reward: 178.518 [178.518, 178.518] - loss: 12.291 - mae: 39.534 - mean_q: 49.930 Interval 1865 (932000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2436 Interval 1866 (932500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3826 Interval 1867 (933000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2593 Interval 1868 (933500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3935 1 episodes - episode_reward: -28.879 [-28.879, -28.879] - loss: 11.555 - mae: 39.898 - mean_q: 51.329 Interval 1869 (934000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2137 1 episodes - episode_reward: 164.711 [164.711, 164.711] - loss: 10.027 - mae: 39.536 - mean_q: 50.874 Interval 1870 (934500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1576 2 episodes - episode_reward: 47.482 [-149.010, 243.974] - loss: 10.356 - mae: 39.375 - mean_q: 50.992 Interval 1871 (935000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5974 1 episodes - episode_reward: 190.819 [190.819, 190.819] - loss: 12.030 - mae: 39.726 - mean_q: 51.146 Interval 1872 (935500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1235 1 episodes - episode_reward: 223.976 [223.976, 223.976] - loss: 14.393 - mae: 39.639 - mean_q: 51.533 Interval 1873 (936000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5052 3 episodes - episode_reward: -94.357 [-218.184, 36.645] - loss: 12.031 - mae: 39.761 - mean_q: 51.687 Interval 1874 (936500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0391 Interval 1875 (937000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4142 1 episodes - episode_reward: -247.464 [-247.464, -247.464] - loss: 9.486 - mae: 39.476 - mean_q: 51.820 Interval 1876 (937500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4934 3 episodes - episode_reward: -91.488 [-122.034, -37.013] - loss: 11.271 - mae: 39.048 - mean_q: 51.239 Interval 1877 (938000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4094 1 episodes - episode_reward: -164.023 [-164.023, -164.023] - loss: 9.578 - mae: 39.234 - mean_q: 51.770 Interval 1878 (938500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3291 4 episodes - episode_reward: -164.503 [-277.444, -40.592] - loss: 11.706 - mae: 38.976 - mean_q: 50.961 Interval 1879 (939000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2270 3 episodes - episode_reward: -91.393 [-145.872, -28.308] - loss: 12.491 - mae: 39.090 - mean_q: 50.846 Interval 1880 (939500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2006 2 episodes - episode_reward: 126.410 [-0.203, 253.023] - loss: 8.836 - mae: 39.024 - mean_q: 50.873 Interval 1881 (940000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0774 Interval 1882 (940500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0682 2 episodes - episode_reward: 19.449 [-107.570, 146.469] - loss: 10.466 - mae: 39.175 - mean_q: 50.411 Interval 1883 (941000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.5529 1 episodes - episode_reward: 217.345 [217.345, 217.345] - loss: 12.959 - mae: 39.440 - mean_q: 51.037 Interval 1884 (941500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.3301 Interval 1885 (942000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8074 2 episodes - episode_reward: 283.430 [275.444, 291.415] - loss: 10.213 - mae: 39.613 - mean_q: 51.019 Interval 1886 (942500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2100 1 episodes - episode_reward: -99.892 [-99.892, -99.892] - loss: 10.656 - mae: 39.824 - mean_q: 51.396 Interval 1887 (943000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1205 1 episodes - episode_reward: -137.880 [-137.880, -137.880] - loss: 11.492 - mae: 39.206 - mean_q: 50.172 Interval 1888 (943500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.2330 1 episodes - episode_reward: 212.355 [212.355, 212.355] - loss: 12.376 - mae: 39.439 - mean_q: 50.411 Interval 1889 (944000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5704 1 episodes - episode_reward: -333.235 [-333.235, -333.235] - loss: 11.449 - mae: 39.394 - mean_q: 50.249 Interval 1890 (944500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3282 1 episodes - episode_reward: 124.034 [124.034, 124.034] - loss: 11.080 - mae: 39.823 - mean_q: 50.837 Interval 1891 (945000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.8319 2 episodes - episode_reward: 250.322 [205.767, 294.876] - loss: 8.808 - mae: 39.824 - mean_q: 50.620 Interval 1892 (945500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7168 2 episodes - episode_reward: -187.118 [-253.314, -120.921] - loss: 9.469 - mae: 39.797 - mean_q: 50.458 Interval 1893 (946000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3889 1 episodes - episode_reward: 218.109 [218.109, 218.109] - loss: 8.059 - mae: 39.828 - mean_q: 50.748 Interval 1894 (946500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2097 3 episodes - episode_reward: -27.394 [-143.521, 168.866] - loss: 9.757 - mae: 39.811 - mean_q: 50.512 Interval 1895 (947000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0362 1 episodes - episode_reward: -117.204 [-117.204, -117.204] - loss: 11.216 - mae: 39.702 - mean_q: 50.433 Interval 1896 (947500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4147 2 episodes - episode_reward: 175.165 [173.697, 176.634] - loss: 9.898 - mae: 39.522 - mean_q: 50.792 Interval 1897 (948000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0958 Interval 1898 (948500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3611 1 episodes - episode_reward: -256.198 [-256.198, -256.198] - loss: 10.243 - mae: 39.742 - mean_q: 50.637 Interval 1899 (949000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1569 2 episodes - episode_reward: 57.197 [-107.974, 222.369] - loss: 8.383 - mae: 39.477 - mean_q: 50.091 Interval 1900 (949500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0806 Interval 1901 (950000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.4181 1 episodes - episode_reward: 157.499 [157.499, 157.499] - loss: 9.861 - mae: 39.554 - mean_q: 49.957 Interval 1902 (950500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1797 1 episodes - episode_reward: 213.258 [213.258, 213.258] - loss: 9.827 - mae: 39.220 - mean_q: 49.433 Interval 1903 (951000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2062 Interval 1904 (951500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1309 Interval 1905 (952000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2995 3 episodes - episode_reward: 33.238 [-100.000, 147.506] - loss: 10.959 - mae: 39.243 - mean_q: 49.654 Interval 1906 (952500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0522 Interval 1907 (953000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1782 Interval 1908 (953500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0587 Interval 1909 (954000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2052 1 episodes - episode_reward: -42.040 [-42.040, -42.040] - loss: 7.961 - mae: 38.821 - mean_q: 49.464 Interval 1910 (954500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0277 Interval 1911 (955000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2072 2 episodes - episode_reward: 11.927 [-100.000, 123.854] - loss: 8.274 - mae: 38.805 - mean_q: 49.016 Interval 1912 (955500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1719 1 episodes - episode_reward: 174.831 [174.831, 174.831] - loss: 9.402 - mae: 38.453 - mean_q: 49.054 Interval 1913 (956000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7013 Interval 1914 (956500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4876 3 episodes - episode_reward: -396.508 [-669.397, -212.004] - loss: 9.706 - mae: 38.669 - mean_q: 49.257 Interval 1915 (957000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2633 1 episodes - episode_reward: 197.409 [197.409, 197.409] - loss: 8.259 - mae: 38.655 - mean_q: 49.020 Interval 1916 (957500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0133 1 episodes - episode_reward: -46.171 [-46.171, -46.171] - loss: 10.645 - mae: 38.751 - mean_q: 48.796 Interval 1917 (958000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0141 Interval 1918 (958500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1823 1 episodes - episode_reward: 165.133 [165.133, 165.133] - loss: 12.241 - mae: 38.364 - mean_q: 48.757 Interval 1919 (959000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1440 Interval 1920 (959500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4129 2 episodes - episode_reward: -40.169 [-237.887, 157.549] - loss: 11.539 - mae: 38.224 - mean_q: 47.965 Interval 1921 (960000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4902 1 episodes - episode_reward: 219.436 [219.436, 219.436] - loss: 16.665 - mae: 38.274 - mean_q: 48.265 Interval 1922 (960500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.4620 1 episodes - episode_reward: 221.119 [221.119, 221.119] - loss: 9.936 - mae: 38.384 - mean_q: 49.164 Interval 1923 (961000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6944 2 episodes - episode_reward: -204.148 [-291.602, -116.694] - loss: 12.288 - mae: 38.412 - mean_q: 49.232 Interval 1924 (961500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.6576 2 episodes - episode_reward: 142.369 [44.274, 240.463] - loss: 10.087 - mae: 38.852 - mean_q: 50.018 Interval 1925 (962000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2547 1 episodes - episode_reward: -67.416 [-67.416, -67.416] - loss: 10.607 - mae: 39.039 - mean_q: 49.832 Interval 1926 (962500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2976 2 episodes - episode_reward: -77.568 [-111.614, -43.522] - loss: 7.688 - mae: 39.249 - mean_q: 49.966 Interval 1927 (963000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.8277 2 episodes - episode_reward: 196.580 [154.696, 238.463] - loss: 8.196 - mae: 39.186 - mean_q: 50.005 Interval 1928 (963500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0375 Interval 1929 (964000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1636 2 episodes - episode_reward: 60.782 [-73.121, 194.685] - loss: 10.280 - mae: 39.056 - mean_q: 49.689 Interval 1930 (964500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0882 Interval 1931 (965000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0395 4 episodes - episode_reward: -112.672 [-446.394, 202.484] - loss: 8.381 - mae: 38.971 - mean_q: 49.540 Interval 1932 (965500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0644 Interval 1933 (966000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0929 4 episodes - episode_reward: -265.842 [-430.589, -127.990] - loss: 13.841 - mae: 39.203 - mean_q: 49.366 Interval 1934 (966500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1047 Interval 1935 (967000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.6117 2 episodes - episode_reward: 182.965 [165.588, 200.341] - loss: 13.202 - mae: 39.806 - mean_q: 49.820 Interval 1936 (967500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3617 1 episodes - episode_reward: -197.467 [-197.467, -197.467] - loss: 13.315 - mae: 40.433 - mean_q: 51.085 Interval 1937 (968000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0266 1 episodes - episode_reward: -65.877 [-65.877, -65.877] - loss: 16.557 - mae: 42.164 - mean_q: 53.047 Interval 1938 (968500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0409 4 episodes - episode_reward: -108.709 [-144.226, -67.158] - loss: 19.641 - mae: 44.436 - mean_q: 55.884 Interval 1939 (969000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0485 4 episodes - episode_reward: -132.893 [-312.872, 2.181] - loss: 29.409 - mae: 48.400 - mean_q: 61.435 Interval 1940 (969500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2834 6 episodes - episode_reward: -190.710 [-414.312, -3.400] - loss: 36.718 - mae: 56.392 - mean_q: 72.799 Interval 1941 (970000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.3361 2 episodes - episode_reward: 89.705 [23.248, 156.163] - loss: 52.032 - mae: 64.735 - mean_q: 84.101 Interval 1942 (970500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4358 2 episodes - episode_reward: -110.783 [-118.907, -102.659] - loss: 54.396 - mae: 73.542 - mean_q: 96.170 Interval 1943 (971000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6735 4 episodes - episode_reward: -84.843 [-150.628, -27.019] - loss: 62.822 - mae: 81.843 - mean_q: 107.786 Interval 1944 (971500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.3585 6 episodes - episode_reward: -359.562 [-598.434, -32.367] - loss: 72.514 - mae: 91.960 - mean_q: 120.695 Interval 1945 (972000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7519 6 episodes - episode_reward: -150.931 [-429.378, -23.075] - loss: 84.021 - mae: 97.791 - mean_q: 127.590 Interval 1946 (972500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1549 Interval 1947 (973000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1054 Interval 1948 (973500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2273 2 episodes - episode_reward: 45.666 [-62.114, 153.445] - loss: 82.444 - mae: 114.046 - mean_q: 151.587 Interval 1949 (974000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0541 Interval 1950 (974500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5289 3 episodes - episode_reward: -125.176 [-226.998, -35.961] - loss: 92.367 - mae: 133.368 - mean_q: 179.188 Interval 1951 (975000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2328 Interval 1952 (975500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1171 Interval 1953 (976000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2212 Interval 1954 (976500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.3101 Interval 1955 (977000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1304 Interval 1956 (977500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -2.2986 2 episodes - episode_reward: -722.123 [-946.018, -498.228] - loss: 114.217 - mae: 183.953 - mean_q: 247.612 Interval 1957 (978000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2523 Interval 1958 (978500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0337 5 episodes - episode_reward: -330.768 [-596.841, -100.000] - loss: 182.195 - mae: 203.973 - mean_q: 275.202 Interval 1959 (979000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2877 5 episodes - episode_reward: -335.799 [-499.851, -100.000] - loss: 187.277 - mae: 205.578 - mean_q: 278.132 Interval 1960 (979500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7355 6 episodes - episode_reward: -225.683 [-367.677, -88.060] - loss: 175.415 - mae: 211.240 - mean_q: 285.144 Interval 1961 (980000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8185 3 episodes - episode_reward: -289.297 [-431.261, -130.954] - loss: 126.415 - mae: 220.958 - mean_q: 298.202 Interval 1962 (980500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3722 4 episodes - episode_reward: -295.766 [-417.419, -124.770] - loss: 272.397 - mae: 230.034 - mean_q: 310.393 Interval 1963 (981000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5561 1 episodes - episode_reward: -523.537 [-523.537, -523.537] - loss: 119.782 - mae: 233.025 - mean_q: 314.762 Interval 1964 (981500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0644 Interval 1965 (982000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0223 Interval 1966 (982500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0390 Interval 1967 (983000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0302 Interval 1968 (983500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0243 Interval 1969 (984000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0198 Interval 1970 (984500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.2419 1 episodes - episode_reward: 128.923 [128.923, 128.923] - loss: 141.300 - mae: 241.425 - mean_q: 327.539 Interval 1971 (985000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4346 4 episodes - episode_reward: -159.167 [-273.821, -35.445] - loss: 148.541 - mae: 245.142 - mean_q: 332.937 Interval 1972 (985500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1033 3 episodes - episode_reward: -167.988 [-206.570, -105.162] - loss: 159.616 - mae: 241.313 - mean_q: 327.650 Interval 1973 (986000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2160 Interval 1974 (986500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1471 Interval 1975 (987000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2087 Interval 1976 (987500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1539 Interval 1977 (988000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1570 Interval 1978 (988500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1979 Interval 1979 (989000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1655 Interval 1980 (989500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1951 Interval 1981 (990000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1791 Interval 1982 (990500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1711 Interval 1983 (991000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1769 Interval 1984 (991500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1588 Interval 1985 (992000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1960 Interval 1986 (992500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.2112 Interval 1987 (993000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1628 Interval 1988 (993500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2027 Interval 1989 (994000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1369 Interval 1990 (994500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.2759 Interval 1991 (995000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -1.0376 Interval 1992 (995500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3148 2 episodes - episode_reward: -1413.120 [-2300.753, -525.488] - loss: 94.947 - mae: 258.191 - mean_q: 347.156 Interval 1993 (996000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9859 Interval 1994 (996500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0557 2 episodes - episode_reward: -557.411 [-682.734, -432.089] - loss: 66.947 - mae: 248.026 - mean_q: 333.142 Interval 1995 (997000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6309 1 episodes - episode_reward: -587.612 [-587.612, -587.612] - loss: 66.343 - mae: 244.696 - mean_q: 328.484 Interval 1996 (997500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9329 Interval 1997 (998000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3244 1 episodes - episode_reward: -631.372 [-631.372, -631.372] - loss: 64.146 - mae: 238.325 - mean_q: 319.668 Interval 1998 (998500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5849 Interval 1999 (999000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9451 1 episodes - episode_reward: -1145.149 [-1145.149, -1145.149] - loss: 74.943 - mae: 233.883 - mean_q: 313.246 Interval 2000 (999500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2086 1 episodes - episode_reward: -630.210 [-630.210, -630.210] - loss: 64.558 - mae: 231.430 - mean_q: 309.629 Interval 2001 (1000000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1870 1 episodes - episode_reward: -882.133 [-882.133, -882.133] - loss: 79.113 - mae: 226.236 - mean_q: 302.844 Interval 2002 (1000500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1273 1 episodes - episode_reward: -352.438 [-352.438, -352.438] - loss: 60.629 - mae: 221.513 - mean_q: 296.163 Interval 2003 (1001000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5464 2 episodes - episode_reward: -452.637 [-560.879, -344.395] - loss: 63.362 - mae: 216.286 - mean_q: 288.402 Interval 2004 (1001500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8408 2 episodes - episode_reward: -185.342 [-280.114, -90.571] - loss: 81.959 - mae: 212.510 - mean_q: 283.729 Interval 2005 (1002000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6002 2 episodes - episode_reward: -448.449 [-656.881, -240.017] - loss: 51.407 - mae: 206.726 - mean_q: 275.732 Interval 2006 (1002500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3046 2 episodes - episode_reward: -305.855 [-367.141, -244.569] - loss: 75.589 - mae: 204.281 - mean_q: 272.359 Interval 2007 (1003000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9451 1 episodes - episode_reward: -288.319 [-288.319, -288.319] - loss: 71.823 - mae: 196.752 - mean_q: 261.913 Interval 2008 (1003500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1071 2 episodes - episode_reward: -232.235 [-432.704, -31.765] - loss: 79.115 - mae: 193.424 - mean_q: 257.316 Interval 2009 (1004000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0378 1 episodes - episode_reward: -775.938 [-775.938, -775.938] - loss: 77.911 - mae: 189.346 - mean_q: 251.876 Interval 2010 (1004500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1221 2 episodes - episode_reward: -292.295 [-387.474, -197.117] - loss: 83.346 - mae: 186.041 - mean_q: 247.753 Interval 2011 (1005000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9687 Interval 2012 (1005500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2459 2 episodes - episode_reward: -597.020 [-750.354, -443.686] - loss: 60.008 - mae: 183.526 - mean_q: 243.701 Interval 2013 (1006000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9060 1 episodes - episode_reward: -349.306 [-349.306, -349.306] - loss: 66.741 - mae: 182.722 - mean_q: 241.786 Interval 2014 (1006500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9936 1 episodes - episode_reward: -471.138 [-471.138, -471.138] - loss: 60.627 - mae: 177.960 - mean_q: 234.845 Interval 2015 (1007000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7005 1 episodes - episode_reward: -323.903 [-323.903, -323.903] - loss: 64.355 - mae: 178.639 - mean_q: 235.835 Interval 2016 (1007500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8239 1 episodes - episode_reward: -404.398 [-404.398, -404.398] - loss: 62.361 - mae: 177.615 - mean_q: 234.532 Interval 2017 (1008000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4290 2 episodes - episode_reward: -105.749 [-220.106, 8.607] - loss: 67.561 - mae: 176.887 - mean_q: 233.730 Interval 2018 (1008500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8640 1 episodes - episode_reward: -369.604 [-369.604, -369.604] - loss: 71.737 - mae: 178.238 - mean_q: 235.899 Interval 2019 (1009000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0003 4 episodes - episode_reward: -290.063 [-437.719, -156.083] - loss: 78.725 - mae: 176.428 - mean_q: 232.541 Interval 2020 (1009500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9353 2 episodes - episode_reward: -271.578 [-404.014, -139.142] - loss: 73.557 - mae: 176.580 - mean_q: 232.164 Interval 2021 (1010000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2641 Interval 2022 (1010500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1289 Interval 2023 (1011000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5021 Interval 2024 (1011500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2448 2 episodes - episode_reward: -233.428 [-345.415, -121.441] - loss: 62.779 - mae: 173.010 - mean_q: 227.893 Interval 2025 (1012000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1755 Interval 2026 (1012500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8509 2 episodes - episode_reward: -248.499 [-268.206, -228.793] - loss: 62.155 - mae: 171.547 - mean_q: 226.255 Interval 2027 (1013000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4560 2 episodes - episode_reward: -62.269 [-63.749, -60.789] - loss: 51.761 - mae: 169.687 - mean_q: 224.058 Interval 2028 (1013500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3485 Interval 2029 (1014000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2892 1 episodes - episode_reward: -372.279 [-372.279, -372.279] - loss: 79.342 - mae: 167.821 - mean_q: 221.722 Interval 2030 (1014500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4804 Interval 2031 (1015000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6416 1 episodes - episode_reward: -489.108 [-489.108, -489.108] - loss: 64.765 - mae: 166.208 - mean_q: 219.943 Interval 2032 (1015500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1082 2 episodes - episode_reward: -88.423 [-105.104, -71.742] - loss: 59.640 - mae: 165.572 - mean_q: 219.101 Interval 2033 (1016000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1579 1 episodes - episode_reward: -75.002 [-75.002, -75.002] - loss: 71.226 - mae: 165.048 - mean_q: 218.670 Interval 2034 (1016500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3446 Interval 2035 (1017000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8695 1 episodes - episode_reward: -423.513 [-423.513, -423.513] - loss: 63.319 - mae: 162.285 - mean_q: 214.179 Interval 2036 (1017500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7760 4 episodes - episode_reward: -287.830 [-399.070, -141.251] - loss: 62.075 - mae: 159.445 - mean_q: 210.335 Interval 2037 (1018000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7617 1 episodes - episode_reward: -230.536 [-230.536, -230.536] - loss: 77.136 - mae: 156.414 - mean_q: 205.933 Interval 2038 (1018500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1130 1 episodes - episode_reward: -617.835 [-617.835, -617.835] - loss: 59.558 - mae: 155.168 - mean_q: 204.993 Interval 2039 (1019000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1051 Interval 2040 (1019500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3025 Interval 2041 (1020000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1618 Interval 2042 (1020500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6220 1 episodes - episode_reward: -610.957 [-610.957, -610.957] - loss: 53.221 - mae: 151.281 - mean_q: 200.174 Interval 2043 (1021000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3519 3 episodes - episode_reward: -207.316 [-312.416, -53.244] - loss: 59.750 - mae: 147.836 - mean_q: 195.263 Interval 2044 (1021500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6473 4 episodes - episode_reward: -222.988 [-350.364, -128.597] - loss: 53.722 - mae: 146.079 - mean_q: 192.695 Interval 2045 (1022000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3220 3 episodes - episode_reward: -187.542 [-234.245, -157.228] - loss: 50.281 - mae: 142.660 - mean_q: 188.047 Interval 2046 (1022500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6198 1 episodes - episode_reward: -321.070 [-321.070, -321.070] - loss: 53.717 - mae: 138.708 - mean_q: 182.289 Interval 2047 (1023000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6768 Interval 2048 (1023500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4333 4 episodes - episode_reward: -304.494 [-510.576, -48.743] - loss: 51.309 - mae: 135.518 - mean_q: 177.326 Interval 2049 (1024000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4925 1 episodes - episode_reward: -23.365 [-23.365, -23.365] - loss: 47.305 - mae: 132.042 - mean_q: 172.219 Interval 2050 (1024500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2263 Interval 2051 (1025000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5632 1 episodes - episode_reward: -411.430 [-411.430, -411.430] - loss: 47.095 - mae: 128.369 - mean_q: 167.388 Interval 2052 (1025500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6951 4 episodes - episode_reward: -218.264 [-412.235, -108.534] - loss: 48.776 - mae: 125.937 - mean_q: 163.090 Interval 2053 (1026000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3412 Interval 2054 (1026500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3398 3 episodes - episode_reward: -302.173 [-521.096, -21.449] - loss: 42.067 - mae: 123.600 - mean_q: 160.295 Interval 2055 (1027000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8018 3 episodes - episode_reward: -112.752 [-160.646, -72.104] - loss: 44.130 - mae: 120.845 - mean_q: 155.300 Interval 2056 (1027500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0115 4 episodes - episode_reward: -143.941 [-249.128, -5.701] - loss: 40.393 - mae: 120.651 - mean_q: 154.805 Interval 2057 (1028000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2774 3 episodes - episode_reward: -50.241 [-100.000, -1.753] - loss: 45.293 - mae: 118.977 - mean_q: 152.152 Interval 2058 (1028500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8639 3 episodes - episode_reward: -111.004 [-242.023, -31.305] - loss: 42.440 - mae: 116.654 - mean_q: 149.220 Interval 2059 (1029000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8492 3 episodes - episode_reward: -158.435 [-219.830, -82.868] - loss: 40.538 - mae: 115.633 - mean_q: 147.635 Interval 2060 (1029500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4600 3 episodes - episode_reward: -88.116 [-166.520, -26.471] - loss: 33.029 - mae: 114.624 - mean_q: 146.756 Interval 2061 (1030000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9394 5 episodes - episode_reward: -193.691 [-373.384, -116.345] - loss: 38.994 - mae: 114.070 - mean_q: 145.820 Interval 2062 (1030500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0165 1 episodes - episode_reward: -77.752 [-77.752, -77.752] - loss: 34.110 - mae: 112.804 - mean_q: 143.844 Interval 2063 (1031000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2608 4 episodes - episode_reward: -248.869 [-541.090, -54.668] - loss: 35.593 - mae: 110.699 - mean_q: 141.380 Interval 2064 (1031500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8206 4 episodes - episode_reward: -127.876 [-176.036, -29.210] - loss: 36.064 - mae: 109.993 - mean_q: 139.845 Interval 2065 (1032000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6869 3 episodes - episode_reward: -103.460 [-208.867, -17.012] - loss: 33.646 - mae: 108.699 - mean_q: 137.760 Interval 2066 (1032500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8548 2 episodes - episode_reward: -175.419 [-232.396, -118.442] - loss: 33.321 - mae: 106.595 - mean_q: 134.430 Interval 2067 (1033000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3461 1 episodes - episode_reward: -305.303 [-305.303, -305.303] - loss: 30.212 - mae: 103.907 - mean_q: 130.443 Interval 2068 (1033500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0873 2 episodes - episode_reward: 17.239 [-128.976, 163.455] - loss: 29.791 - mae: 101.848 - mean_q: 127.302 Interval 2069 (1034000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2204 4 episodes - episode_reward: -282.991 [-337.033, -137.424] - loss: 33.273 - mae: 99.653 - mean_q: 123.395 Interval 2070 (1034500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2486 2 episodes - episode_reward: -79.577 [-94.807, -64.347] - loss: 35.065 - mae: 96.120 - mean_q: 117.669 Interval 2071 (1035000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3280 2 episodes - episode_reward: -274.670 [-279.984, -269.356] - loss: 40.543 - mae: 94.371 - mean_q: 115.510 Interval 2072 (1035500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0101 3 episodes - episode_reward: -195.942 [-351.910, -56.669] - loss: 33.028 - mae: 93.069 - mean_q: 112.802 Interval 2073 (1036000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3592 1 episodes - episode_reward: -109.903 [-109.903, -109.903] - loss: 33.262 - mae: 90.919 - mean_q: 109.440 Interval 2074 (1036500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7447 2 episodes - episode_reward: -220.015 [-255.284, -184.747] - loss: 38.354 - mae: 89.228 - mean_q: 107.390 Interval 2075 (1037000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0390 2 episodes - episode_reward: -248.399 [-255.493, -241.304] - loss: 33.469 - mae: 87.809 - mean_q: 105.453 Interval 2076 (1037500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2098 3 episodes - episode_reward: -201.443 [-273.811, -118.512] - loss: 35.509 - mae: 85.953 - mean_q: 102.877 Interval 2077 (1038000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3021 1 episodes - episode_reward: -153.463 [-153.463, -153.463] - loss: 34.381 - mae: 86.039 - mean_q: 102.656 Interval 2078 (1038500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0776 3 episodes - episode_reward: -167.268 [-201.886, -116.602] - loss: 46.620 - mae: 84.730 - mean_q: 100.653 Interval 2079 (1039000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0495 3 episodes - episode_reward: -196.391 [-385.141, -27.415] - loss: 49.399 - mae: 83.246 - mean_q: 99.558 Interval 2080 (1039500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4903 2 episodes - episode_reward: -119.213 [-182.300, -56.126] - loss: 40.561 - mae: 82.428 - mean_q: 97.325 Interval 2081 (1040000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0555 2 episodes - episode_reward: -267.986 [-457.799, -78.174] - loss: 39.764 - mae: 82.760 - mean_q: 98.086 Interval 2082 (1040500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1867 Interval 2083 (1041000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0094 Interval 2084 (1041500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2694 3 episodes - episode_reward: -261.139 [-430.784, -68.867] - loss: 37.241 - mae: 80.386 - mean_q: 95.279 Interval 2085 (1042000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1095 Interval 2086 (1042500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.1211 1 episodes - episode_reward: 101.033 [101.033, 101.033] - loss: 39.268 - mae: 79.684 - mean_q: 94.917 Interval 2087 (1043000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3982 2 episodes - episode_reward: -385.921 [-475.915, -295.926] - loss: 40.378 - mae: 78.467 - mean_q: 94.104 Interval 2088 (1043500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8519 3 episodes - episode_reward: -239.744 [-419.903, -122.973] - loss: 42.599 - mae: 78.713 - mean_q: 94.034 Interval 2089 (1044000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5856 1 episodes - episode_reward: -481.849 [-481.849, -481.849] - loss: 39.885 - mae: 77.962 - mean_q: 93.117 Interval 2090 (1044500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0530 Interval 2091 (1045000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0805 Interval 2092 (1045500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.2183 1 episodes - episode_reward: 63.483 [63.483, 63.483] - loss: 42.712 - mae: 80.635 - mean_q: 95.653 Interval 2093 (1046000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7883 3 episodes - episode_reward: -466.201 [-608.253, -230.296] - loss: 41.480 - mae: 82.131 - mean_q: 97.647 Interval 2094 (1046500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0261 1 episodes - episode_reward: -54.696 [-54.696, -54.696] - loss: 43.997 - mae: 84.123 - mean_q: 99.096 Interval 2095 (1047000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2561 Interval 2096 (1047500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3367 Interval 2097 (1048000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0636 Interval 2098 (1048500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3621 Interval 2099 (1049000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1030 Interval 2100 (1049500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1716 Interval 2101 (1050000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.6477 Interval 2102 (1050500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3262 1 episodes - episode_reward: -1249.802 [-1249.802, -1249.802] - loss: 48.208 - mae: 85.591 - mean_q: 102.201 Interval 2103 (1051000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5337 1 episodes - episode_reward: -468.080 [-468.080, -468.080] - loss: 38.910 - mae: 85.397 - mean_q: 101.834 Interval 2104 (1051500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2629 Interval 2105 (1052000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1092 Interval 2106 (1052500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.3159 Interval 2107 (1053000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: 0.0998 Interval 2108 (1053500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.6720 1 episodes - episode_reward: -715.106 [-715.106, -715.106] - loss: 46.930 - mae: 84.713 - mean_q: 101.421 Interval 2109 (1054000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4025 2 episodes - episode_reward: -101.003 [-122.416, -79.590] - loss: 37.876 - mae: 81.781 - mean_q: 97.390 Interval 2110 (1054500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4987 3 episodes - episode_reward: -260.668 [-391.116, -174.302] - loss: 34.610 - mae: 82.127 - mean_q: 97.706 Interval 2111 (1055000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4061 2 episodes - episode_reward: -119.224 [-126.803, -111.646] - loss: 37.257 - mae: 80.049 - mean_q: 94.997 Interval 2112 (1055500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7609 1 episodes - episode_reward: -152.314 [-152.314, -152.314] - loss: 36.398 - mae: 78.699 - mean_q: 92.648 Interval 2113 (1056000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7053 2 episodes - episode_reward: -268.606 [-388.567, -148.646] - loss: 39.449 - mae: 78.732 - mean_q: 92.528 Interval 2114 (1056500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0014 Interval 2115 (1057000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2021 Interval 2116 (1057500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8103 1 episodes - episode_reward: -488.469 [-488.469, -488.469] - loss: 33.132 - mae: 77.223 - mean_q: 89.850 Interval 2117 (1058000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1969 2 episodes - episode_reward: -330.805 [-388.924, -272.685] - loss: 34.883 - mae: 75.923 - mean_q: 88.536 Interval 2118 (1058500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0697 1 episodes - episode_reward: 67.385 [67.385, 67.385] - loss: 35.582 - mae: 75.001 - mean_q: 86.582 Interval 2119 (1059000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6551 1 episodes - episode_reward: -250.209 [-250.209, -250.209] - loss: 43.218 - mae: 75.167 - mean_q: 86.351 Interval 2120 (1059500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6469 3 episodes - episode_reward: -279.650 [-353.785, -227.422] - loss: 38.191 - mae: 74.707 - mean_q: 85.617 Interval 2121 (1060000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9804 3 episodes - episode_reward: -332.777 [-477.960, -199.553] - loss: 46.743 - mae: 74.961 - mean_q: 85.265 Interval 2122 (1060500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9686 3 episodes - episode_reward: -205.323 [-393.930, -100.000] - loss: 40.628 - mae: 73.413 - mean_q: 82.351 Interval 2123 (1061000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5520 Interval 2124 (1061500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5284 2 episodes - episode_reward: -297.256 [-436.360, -158.152] - loss: 33.683 - mae: 70.515 - mean_q: 78.300 Interval 2125 (1062000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8451 2 episodes - episode_reward: -376.799 [-552.611, -200.986] - loss: 40.380 - mae: 70.265 - mean_q: 77.901 Interval 2126 (1062500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9911 3 episodes - episode_reward: -326.646 [-529.858, -144.083] - loss: 35.469 - mae: 70.788 - mean_q: 77.925 Interval 2127 (1063000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9243 4 episodes - episode_reward: -362.772 [-420.861, -220.983] - loss: 33.875 - mae: 70.826 - mean_q: 78.134 Interval 2128 (1063500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0033 2 episodes - episode_reward: -259.165 [-298.246, -220.085] - loss: 39.433 - mae: 71.542 - mean_q: 77.610 Interval 2129 (1064000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5190 4 episodes - episode_reward: -222.935 [-275.157, -173.885] - loss: 37.101 - mae: 72.317 - mean_q: 78.006 Interval 2130 (1064500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7496 1 episodes - episode_reward: -145.448 [-145.448, -145.448] - loss: 37.161 - mae: 73.159 - mean_q: 79.331 Interval 2131 (1065000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2579 1 episodes - episode_reward: -861.572 [-861.572, -861.572] - loss: 37.480 - mae: 73.798 - mean_q: 78.955 Interval 2132 (1065500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6505 2 episodes - episode_reward: -562.303 [-826.133, -298.474] - loss: 35.778 - mae: 74.862 - mean_q: 79.335 Interval 2133 (1066000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1614 2 episodes - episode_reward: -407.929 [-637.381, -178.477] - loss: 42.737 - mae: 74.224 - mean_q: 78.236 Interval 2134 (1066500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6498 2 episodes - episode_reward: -414.180 [-642.701, -185.658] - loss: 37.385 - mae: 73.918 - mean_q: 76.571 Interval 2135 (1067000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5145 3 episodes - episode_reward: -386.716 [-782.399, -104.091] - loss: 33.721 - mae: 73.902 - mean_q: 75.861 Interval 2136 (1067500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.1012 4 episodes - episode_reward: -524.015 [-882.602, -121.274] - loss: 33.802 - mae: 74.414 - mean_q: 76.282 Interval 2137 (1068000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4681 1 episodes - episode_reward: -233.342 [-233.342, -233.342] - loss: 35.362 - mae: 74.443 - mean_q: 73.722 Interval 2138 (1068500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0961 2 episodes - episode_reward: -252.471 [-360.593, -144.349] - loss: 36.365 - mae: 73.573 - mean_q: 72.690 Interval 2139 (1069000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4569 1 episodes - episode_reward: -1037.084 [-1037.084, -1037.084] - loss: 38.528 - mae: 75.269 - mean_q: 75.177 Interval 2140 (1069500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5211 2 episodes - episode_reward: -452.193 [-537.767, -366.619] - loss: 48.596 - mae: 74.521 - mean_q: 74.776 Interval 2141 (1070000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3950 3 episodes - episode_reward: -543.398 [-988.626, -296.457] - loss: 58.214 - mae: 75.245 - mean_q: 75.859 Interval 2142 (1070500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3510 6 episodes - episode_reward: -290.197 [-440.123, -150.448] - loss: 45.498 - mae: 74.790 - mean_q: 75.429 Interval 2143 (1071000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0667 2 episodes - episode_reward: -265.944 [-403.703, -128.186] - loss: 40.129 - mae: 75.003 - mean_q: 75.246 Interval 2144 (1071500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1126 3 episodes - episode_reward: -317.328 [-438.438, -188.609] - loss: 41.199 - mae: 75.693 - mean_q: 75.026 Interval 2145 (1072000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4522 5 episodes - episode_reward: -289.440 [-581.436, -129.476] - loss: 41.621 - mae: 75.872 - mean_q: 75.054 Interval 2146 (1072500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2385 2 episodes - episode_reward: -267.530 [-317.910, -217.150] - loss: 50.196 - mae: 76.984 - mean_q: 75.802 Interval 2147 (1073000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5908 3 episodes - episode_reward: -232.674 [-291.522, -157.632] - loss: 44.797 - mae: 76.252 - mean_q: 75.598 Interval 2148 (1073500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3181 1 episodes - episode_reward: -190.352 [-190.352, -190.352] - loss: 49.671 - mae: 76.347 - mean_q: 75.635 Interval 2149 (1074000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2440 4 episodes - episode_reward: -296.725 [-468.296, -167.918] - loss: 44.711 - mae: 77.571 - mean_q: 77.220 Interval 2150 (1074500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9045 5 episodes - episode_reward: -291.249 [-510.636, -159.413] - loss: 51.320 - mae: 76.498 - mean_q: 75.158 Interval 2151 (1075000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5760 3 episodes - episode_reward: -358.411 [-780.749, -124.312] - loss: 48.652 - mae: 76.634 - mean_q: 75.494 Interval 2152 (1075500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3680 6 episodes - episode_reward: -317.538 [-556.705, -120.310] - loss: 69.884 - mae: 77.188 - mean_q: 75.409 Interval 2153 (1076000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4929 1 episodes - episode_reward: -69.213 [-69.213, -69.213] - loss: 53.413 - mae: 78.132 - mean_q: 76.902 Interval 2154 (1076500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4687 1 episodes - episode_reward: -372.706 [-372.706, -372.706] - loss: 53.980 - mae: 78.303 - mean_q: 77.476 Interval 2155 (1077000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2426 Interval 2156 (1077500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3042 Interval 2157 (1078000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.0518 Interval 2158 (1078500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.3132 Interval 2159 (1079000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1387 Interval 2160 (1079500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2554 Interval 2161 (1080000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7845 3 episodes - episode_reward: -345.649 [-861.949, -87.185] - loss: 75.841 - mae: 100.004 - mean_q: 116.849 Interval 2162 (1080500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5144 2 episodes - episode_reward: -197.499 [-213.423, -181.574] - loss: 87.140 - mae: 103.474 - mean_q: 122.998 Interval 2163 (1081000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3604 Interval 2164 (1081500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0103 3 episodes - episode_reward: -193.192 [-284.613, -118.526] - loss: 76.266 - mae: 108.133 - mean_q: 130.545 Interval 2165 (1082000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0912 Interval 2166 (1082500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1328 Interval 2167 (1083000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2962 Interval 2168 (1083500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0052 Interval 2169 (1084000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5033 1 episodes - episode_reward: -618.325 [-618.325, -618.325] - loss: 89.973 - mae: 110.941 - mean_q: 136.636 Interval 2170 (1084500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4908 Interval 2171 (1085000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1200 Interval 2172 (1085500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2782 Interval 2173 (1086000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2192 4 episodes - episode_reward: -215.298 [-441.681, -100.000] - loss: 87.799 - mae: 109.368 - mean_q: 134.310 Interval 2174 (1086500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4082 1 episodes - episode_reward: -152.647 [-152.647, -152.647] - loss: 75.814 - mae: 109.814 - mean_q: 133.789 Interval 2175 (1087000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4085 1 episodes - episode_reward: -303.241 [-303.241, -303.241] - loss: 81.311 - mae: 110.821 - mean_q: 135.562 Interval 2176 (1087500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3348 2 episodes - episode_reward: -97.029 [-103.953, -90.105] - loss: 85.555 - mae: 111.047 - mean_q: 135.868 Interval 2177 (1088000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2247 1 episodes - episode_reward: -129.966 [-129.966, -129.966] - loss: 73.291 - mae: 111.760 - mean_q: 136.330 Interval 2178 (1088500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2029 Interval 2179 (1089000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4329 1 episodes - episode_reward: -342.060 [-342.060, -342.060] - loss: 72.768 - mae: 116.871 - mean_q: 142.476 Interval 2180 (1089500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2135 Interval 2181 (1090000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2774 Interval 2182 (1090500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1096 Interval 2183 (1091000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1827 Interval 2184 (1091500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1674 Interval 2185 (1092000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2372 Interval 2186 (1092500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1578 Interval 2187 (1093000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2324 Interval 2188 (1093500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2360 Interval 2189 (1094000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.0568 Interval 2190 (1094500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.2510 Interval 2191 (1095000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2584 Interval 2192 (1095500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1992 Interval 2193 (1096000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4936 1 episodes - episode_reward: -1423.195 [-1423.195, -1423.195] - loss: 44.579 - mae: 121.254 - mean_q: 150.775 Interval 2194 (1096500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5210 2 episodes - episode_reward: -337.192 [-372.541, -301.844] - loss: 47.841 - mae: 120.508 - mean_q: 149.569 Interval 2195 (1097000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8151 2 episodes - episode_reward: -268.368 [-282.618, -254.119] - loss: 53.983 - mae: 119.685 - mean_q: 148.729 Interval 2196 (1097500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7573 2 episodes - episode_reward: -174.266 [-199.433, -149.100] - loss: 45.061 - mae: 118.305 - mean_q: 146.667 Interval 2197 (1098000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9146 1 episodes - episode_reward: -201.358 [-201.358, -201.358] - loss: 55.267 - mae: 115.279 - mean_q: 142.948 Interval 2198 (1098500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0383 1 episodes - episode_reward: -275.502 [-275.502, -275.502] - loss: 63.788 - mae: 113.616 - mean_q: 139.675 Interval 2199 (1099000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4915 2 episodes - episode_reward: -109.404 [-152.975, -65.833] - loss: 61.331 - mae: 110.999 - mean_q: 136.096 Interval 2200 (1099500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1521 Interval 2201 (1100000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0232 2 episodes - episode_reward: -259.103 [-261.529, -256.676] - loss: 46.632 - mae: 107.659 - mean_q: 131.294 Interval 2202 (1100500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1177 1 episodes - episode_reward: -488.108 [-488.108, -488.108] - loss: 54.842 - mae: 105.801 - mean_q: 127.181 Interval 2203 (1101000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0864 Interval 2204 (1101500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6707 Interval 2205 (1102000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0046 1 episodes - episode_reward: -827.255 [-827.255, -827.255] - loss: 46.754 - mae: 102.835 - mean_q: 122.099 Interval 2206 (1102500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6997 1 episodes - episode_reward: -878.566 [-878.566, -878.566] - loss: 56.936 - mae: 101.609 - mean_q: 120.462 Interval 2207 (1103000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9528 1 episodes - episode_reward: -618.567 [-618.567, -618.567] - loss: 78.851 - mae: 99.727 - mean_q: 117.235 Interval 2208 (1103500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6057 1 episodes - episode_reward: -190.970 [-190.970, -190.970] - loss: 50.420 - mae: 99.473 - mean_q: 117.071 Interval 2209 (1104000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7992 Interval 2210 (1104500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1659 Interval 2211 (1105000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6588 3 episodes - episode_reward: -506.358 [-927.015, -230.196] - loss: 48.958 - mae: 96.586 - mean_q: 112.034 Interval 2212 (1105500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3264 Interval 2213 (1106000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4803 Interval 2214 (1106500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.2076 Interval 2215 (1107000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2401 Interval 2216 (1107500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3513 5 episodes - episode_reward: -204.785 [-574.067, -101.549] - loss: 46.430 - mae: 93.658 - mean_q: 108.487 Interval 2217 (1108000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.0413 2 episodes - episode_reward: -1025.390 [-1546.223, -504.557] - loss: 62.933 - mae: 92.336 - mean_q: 104.702 Interval 2218 (1108500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4814 1 episodes - episode_reward: -192.743 [-192.743, -192.743] - loss: 46.652 - mae: 92.662 - mean_q: 107.093 Interval 2219 (1109000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1734 3 episodes - episode_reward: -208.341 [-293.080, -134.260] - loss: 65.446 - mae: 93.557 - mean_q: 108.151 Interval 2220 (1109500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9127 1 episodes - episode_reward: -386.926 [-386.926, -386.926] - loss: 63.777 - mae: 90.799 - mean_q: 105.995 Interval 2221 (1110000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7621 1 episodes - episode_reward: -363.766 [-363.766, -363.766] - loss: 70.168 - mae: 91.452 - mean_q: 105.796 Interval 2222 (1110500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8905 4 episodes - episode_reward: -379.124 [-794.090, -117.238] - loss: 45.512 - mae: 89.753 - mean_q: 104.574 Interval 2223 (1111000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4609 1 episodes - episode_reward: -251.151 [-251.151, -251.151] - loss: 42.191 - mae: 88.580 - mean_q: 102.448 Interval 2224 (1111500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8480 2 episodes - episode_reward: -388.556 [-417.340, -359.772] - loss: 46.006 - mae: 88.453 - mean_q: 102.303 Interval 2225 (1112000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9530 2 episodes - episode_reward: -307.144 [-353.030, -261.259] - loss: 42.730 - mae: 88.616 - mean_q: 100.974 Interval 2226 (1112500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7516 3 episodes - episode_reward: -258.334 [-385.778, -147.405] - loss: 43.607 - mae: 88.896 - mean_q: 101.742 Interval 2227 (1113000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3043 2 episodes - episode_reward: -288.856 [-324.896, -252.816] - loss: 53.199 - mae: 88.839 - mean_q: 100.852 Interval 2228 (1113500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6575 3 episodes - episode_reward: -371.367 [-493.619, -248.050] - loss: 72.069 - mae: 89.611 - mean_q: 101.033 Interval 2229 (1114000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.3133 1 episodes - episode_reward: -585.169 [-585.169, -585.169] - loss: 48.280 - mae: 89.283 - mean_q: 102.519 Interval 2230 (1114500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2389 2 episodes - episode_reward: -234.663 [-248.630, -220.695] - loss: 47.850 - mae: 89.265 - mean_q: 101.949 Interval 2231 (1115000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -1.0445 2 episodes - episode_reward: -265.277 [-395.340, -135.214] - loss: 42.378 - mae: 88.229 - mean_q: 99.002 Interval 2232 (1115500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.4936 1 episodes - episode_reward: -322.545 [-322.545, -322.545] - loss: 54.540 - mae: 88.020 - mean_q: 99.327 Interval 2233 (1116000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.5388 1 episodes - episode_reward: -212.442 [-212.442, -212.442] - loss: 41.088 - mae: 88.135 - mean_q: 98.466 Interval 2234 (1116500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7706 1 episodes - episode_reward: -253.232 [-253.232, -253.232] - loss: 55.432 - mae: 88.251 - mean_q: 98.502 Interval 2235 (1117000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2263 2 episodes - episode_reward: -396.559 [-419.782, -373.335] - loss: 48.572 - mae: 88.311 - mean_q: 98.587 Interval 2236 (1117500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1194 3 episodes - episode_reward: -326.314 [-500.974, -129.227] - loss: 48.791 - mae: 90.060 - mean_q: 101.579 Interval 2237 (1118000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5724 2 episodes - episode_reward: -173.101 [-233.920, -112.282] - loss: 56.714 - mae: 90.986 - mean_q: 102.591 Interval 2238 (1118500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4361 1 episodes - episode_reward: -300.496 [-300.496, -300.496] - loss: 50.760 - mae: 91.285 - mean_q: 101.811 Interval 2239 (1119000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.4435 2 episodes - episode_reward: -1364.294 [-2327.856, -400.732] - loss: 60.059 - mae: 91.457 - mean_q: 102.744 Interval 2240 (1119500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7955 4 episodes - episode_reward: -281.699 [-377.573, -234.000] - loss: 63.045 - mae: 92.506 - mean_q: 102.743 Interval 2241 (1120000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9727 2 episodes - episode_reward: -605.007 [-821.919, -388.094] - loss: 48.953 - mae: 91.171 - mean_q: 101.263 Interval 2242 (1120500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4805 1 episodes - episode_reward: -321.102 [-321.102, -321.102] - loss: 63.116 - mae: 90.470 - mean_q: 102.349 Interval 2243 (1121000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4824 4 episodes - episode_reward: -286.860 [-735.762, 26.803] - loss: 48.632 - mae: 91.296 - mean_q: 102.519 Interval 2244 (1121500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4071 1 episodes - episode_reward: -1005.737 [-1005.737, -1005.737] - loss: 47.619 - mae: 90.749 - mean_q: 102.563 Interval 2245 (1122000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.2401 2 episodes - episode_reward: -1181.569 [-1944.531, -418.607] - loss: 56.629 - mae: 91.330 - mean_q: 102.432 Interval 2246 (1122500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1236 5 episodes - episode_reward: -319.309 [-543.121, -88.809] - loss: 46.369 - mae: 93.694 - mean_q: 105.164 Interval 2247 (1123000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4995 2 episodes - episode_reward: -364.062 [-660.054, -68.069] - loss: 55.127 - mae: 96.456 - mean_q: 109.594 Interval 2248 (1123500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7238 2 episodes - episode_reward: -389.064 [-656.851, -121.278] - loss: 60.867 - mae: 99.976 - mean_q: 112.279 Interval 2249 (1124000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0528 3 episodes - episode_reward: -340.319 [-537.399, -145.130] - loss: 50.799 - mae: 101.429 - mean_q: 115.175 Interval 2250 (1124500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3997 4 episodes - episode_reward: -280.246 [-427.551, -100.000] - loss: 44.957 - mae: 104.787 - mean_q: 121.240 Interval 2251 (1125000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5308 5 episodes - episode_reward: -268.152 [-345.913, -213.684] - loss: 52.784 - mae: 105.753 - mean_q: 121.547 Interval 2252 (1125500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3051 2 episodes - episode_reward: -328.067 [-490.564, -165.571] - loss: 52.178 - mae: 108.875 - mean_q: 126.606 Interval 2253 (1126000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3172 2 episodes - episode_reward: -229.086 [-317.496, -140.675] - loss: 70.450 - mae: 113.457 - mean_q: 135.294 Interval 2254 (1126500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8305 3 episodes - episode_reward: -408.226 [-629.767, -174.081] - loss: 50.786 - mae: 115.126 - mean_q: 137.514 Interval 2255 (1127000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9272 2 episodes - episode_reward: -407.120 [-451.259, -362.980] - loss: 45.952 - mae: 117.253 - mean_q: 140.722 Interval 2256 (1127500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3975 2 episodes - episode_reward: -289.965 [-476.386, -103.544] - loss: 54.284 - mae: 119.436 - mean_q: 142.818 Interval 2257 (1128000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9844 2 episodes - episode_reward: -606.251 [-728.406, -484.096] - loss: 53.530 - mae: 120.613 - mean_q: 143.512 Interval 2258 (1128500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2669 3 episodes - episode_reward: -380.439 [-586.678, -100.000] - loss: 57.309 - mae: 122.286 - mean_q: 145.772 Interval 2259 (1129000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8117 3 episodes - episode_reward: -282.475 [-421.922, -153.622] - loss: 55.595 - mae: 123.363 - mean_q: 145.822 Interval 2260 (1129500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1293 2 episodes - episode_reward: -503.967 [-548.605, -459.329] - loss: 70.000 - mae: 121.825 - mean_q: 143.436 Interval 2261 (1130000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1773 4 episodes - episode_reward: -298.637 [-563.554, -83.576] - loss: 60.095 - mae: 121.509 - mean_q: 143.690 Interval 2262 (1130500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6230 2 episodes - episode_reward: -365.577 [-366.876, -364.278] - loss: 70.107 - mae: 120.953 - mean_q: 142.544 Interval 2263 (1131000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9846 2 episodes - episode_reward: -537.418 [-683.971, -390.865] - loss: 83.899 - mae: 119.154 - mean_q: 141.954 Interval 2264 (1131500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9909 3 episodes - episode_reward: -141.933 [-241.194, -38.989] - loss: 46.093 - mae: 117.909 - mean_q: 140.642 Interval 2265 (1132000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1494 2 episodes - episode_reward: -588.485 [-650.381, -526.589] - loss: 52.265 - mae: 115.741 - mean_q: 138.271 Interval 2266 (1132500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7518 2 episodes - episode_reward: -105.459 [-127.875, -83.042] - loss: 63.179 - mae: 114.469 - mean_q: 136.718 Interval 2267 (1133000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1591 3 episodes - episode_reward: -246.809 [-259.277, -230.669] - loss: 54.183 - mae: 112.954 - mean_q: 135.283 Interval 2268 (1133500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7967 3 episodes - episode_reward: -301.318 [-504.237, -150.835] - loss: 57.055 - mae: 110.631 - mean_q: 133.250 Interval 2269 (1134000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2235 2 episodes - episode_reward: -264.205 [-396.694, -131.716] - loss: 66.059 - mae: 109.188 - mean_q: 129.778 Interval 2270 (1134500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3986 4 episodes - episode_reward: -204.446 [-260.569, -153.331] - loss: 55.335 - mae: 107.447 - mean_q: 126.802 Interval 2271 (1135000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3231 4 episodes - episode_reward: -223.764 [-327.595, -114.429] - loss: 59.753 - mae: 106.822 - mean_q: 127.124 Interval 2272 (1135500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1590 5 episodes - episode_reward: -348.097 [-529.674, -172.142] - loss: 58.443 - mae: 105.787 - mean_q: 124.554 Interval 2273 (1136000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2143 3 episodes - episode_reward: -349.479 [-455.890, -268.992] - loss: 60.941 - mae: 104.955 - mean_q: 121.785 Interval 2274 (1136500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6590 3 episodes - episode_reward: -145.998 [-185.431, -109.859] - loss: 62.030 - mae: 101.842 - mean_q: 116.328 Interval 2275 (1137000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2579 2 episodes - episode_reward: -133.333 [-151.346, -115.320] - loss: 60.754 - mae: 102.269 - mean_q: 117.840 Interval 2276 (1137500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9112 5 episodes - episode_reward: -553.528 [-1966.698, -109.235] - loss: 56.061 - mae: 102.710 - mean_q: 118.323 Interval 2277 (1138000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4397 5 episodes - episode_reward: -253.189 [-577.507, -101.402] - loss: 58.598 - mae: 102.830 - mean_q: 115.712 Interval 2278 (1138500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.1069 3 episodes - episode_reward: -171.456 [-241.719, -135.942] - loss: 75.119 - mae: 102.763 - mean_q: 115.410 Interval 2279 (1139000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7630 4 episodes - episode_reward: -114.321 [-184.497, -73.440] - loss: 73.119 - mae: 101.951 - mean_q: 114.708 Interval 2280 (1139500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9514 4 episodes - episode_reward: -108.698 [-180.140, -67.806] - loss: 52.511 - mae: 100.402 - mean_q: 109.117 Interval 2281 (1140000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8957 3 episodes - episode_reward: -150.675 [-204.794, -106.911] - loss: 49.584 - mae: 100.102 - mean_q: 108.392 Interval 2282 (1140500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3812 2 episodes - episode_reward: -359.412 [-479.149, -239.675] - loss: 105.451 - mae: 100.677 - mean_q: 108.556 Interval 2283 (1141000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7449 3 episodes - episode_reward: -86.519 [-124.180, -64.243] - loss: 53.887 - mae: 100.427 - mean_q: 109.134 Interval 2284 (1141500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5606 5 episodes - episode_reward: -176.456 [-427.213, -40.999] - loss: 61.431 - mae: 101.171 - mean_q: 108.141 Interval 2285 (1142000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5832 4 episodes - episode_reward: -196.514 [-369.463, -69.839] - loss: 74.257 - mae: 101.143 - mean_q: 107.151 Interval 2286 (1142500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7791 4 episodes - episode_reward: -216.010 [-284.101, -130.087] - loss: 67.329 - mae: 101.797 - mean_q: 105.573 Interval 2287 (1143000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9242 2 episodes - episode_reward: -237.980 [-300.474, -175.486] - loss: 70.539 - mae: 101.568 - mean_q: 105.523 Interval 2288 (1143500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3092 3 episodes - episode_reward: -222.611 [-335.244, -126.928] - loss: 75.233 - mae: 102.513 - mean_q: 106.951 Interval 2289 (1144000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5127 3 episodes - episode_reward: -264.470 [-455.456, -94.280] - loss: 77.895 - mae: 103.711 - mean_q: 105.412 Interval 2290 (1144500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3070 1 episodes - episode_reward: -107.460 [-107.460, -107.460] - loss: 72.595 - mae: 104.833 - mean_q: 107.425 Interval 2291 (1145000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5041 3 episodes - episode_reward: -246.841 [-384.397, -170.757] - loss: 60.795 - mae: 105.708 - mean_q: 107.470 Interval 2292 (1145500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0876 Interval 2293 (1146000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0468 Interval 2294 (1146500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2608 1 episodes - episode_reward: -184.993 [-184.993, -184.993] - loss: 72.976 - mae: 111.013 - mean_q: 114.375 Interval 2295 (1147000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4723 Interval 2296 (1147500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9725 1 episodes - episode_reward: -618.796 [-618.796, -618.796] - loss: 67.511 - mae: 114.884 - mean_q: 121.296 Interval 2297 (1148000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4466 Interval 2298 (1148500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2321 3 episodes - episode_reward: -327.172 [-544.441, -138.204] - loss: 68.610 - mae: 119.215 - mean_q: 124.332 Interval 2299 (1149000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2699 1 episodes - episode_reward: -500.348 [-500.348, -500.348] - loss: 74.878 - mae: 120.218 - mean_q: 127.885 Interval 2300 (1149500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4709 1 episodes - episode_reward: -412.804 [-412.804, -412.804] - loss: 74.833 - mae: 123.423 - mean_q: 130.689 Interval 2301 (1150000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4175 1 episodes - episode_reward: -60.473 [-60.473, -60.473] - loss: 79.512 - mae: 126.212 - mean_q: 133.270 Interval 2302 (1150500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1677 Interval 2303 (1151000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.4832 Interval 2304 (1151500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -1.4288 3 episodes - episode_reward: -393.584 [-711.541, -100.000] - loss: 66.004 - mae: 130.623 - mean_q: 142.980 Interval 2305 (1152000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3829 2 episodes - episode_reward: -80.080 [-111.072, -49.088] - loss: 72.273 - mae: 131.181 - mean_q: 142.073 Interval 2306 (1152500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1206 Interval 2307 (1153000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4728 Interval 2308 (1153500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5800 1 episodes - episode_reward: -601.563 [-601.563, -601.563] - loss: 92.232 - mae: 132.775 - mean_q: 146.206 Interval 2309 (1154000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7045 1 episodes - episode_reward: -138.428 [-138.428, -138.428] - loss: 81.812 - mae: 131.192 - mean_q: 144.074 Interval 2310 (1154500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1832 Interval 2311 (1155000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1447 Interval 2312 (1155500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.3043 Interval 2313 (1156000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.4345 Interval 2314 (1156500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.3373 Interval 2315 (1157000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0324 4 episodes - episode_reward: -436.171 [-986.039, -123.031] - loss: 68.608 - mae: 129.168 - mean_q: 142.802 Interval 2316 (1157500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0800 Interval 2317 (1158000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1952 3 episodes - episode_reward: -270.040 [-519.011, -130.609] - loss: 81.494 - mae: 128.022 - mean_q: 141.692 Interval 2318 (1158500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4593 Interval 2319 (1159000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9143 1 episodes - episode_reward: -583.471 [-583.471, -583.471] - loss: 68.644 - mae: 124.904 - mean_q: 137.315 Interval 2320 (1159500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4055 1 episodes - episode_reward: -219.216 [-219.216, -219.216] - loss: 66.151 - mae: 124.503 - mean_q: 136.456 Interval 2321 (1160000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0297 2 episodes - episode_reward: -297.096 [-467.634, -126.557] - loss: 65.101 - mae: 124.912 - mean_q: 138.319 Interval 2322 (1160500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6524 Interval 2323 (1161000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7039 2 episodes - episode_reward: -313.169 [-593.228, -33.110] - loss: 62.847 - mae: 122.411 - mean_q: 136.147 Interval 2324 (1161500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2891 Interval 2325 (1162000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3914 Interval 2326 (1162500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1112 Interval 2327 (1163000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2314 Interval 2328 (1163500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.8728 3 episodes - episode_reward: -341.375 [-678.597, -128.605] - loss: 62.240 - mae: 121.084 - mean_q: 135.796 Interval 2329 (1164000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3294 1 episodes - episode_reward: -135.100 [-135.100, -135.100] - loss: 66.055 - mae: 121.132 - mean_q: 134.753 Interval 2330 (1164500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4846 5 episodes - episode_reward: -149.640 [-233.724, -80.772] - loss: 69.665 - mae: 119.385 - mean_q: 133.327 Interval 2331 (1165000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2630 Interval 2332 (1165500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3397 Interval 2333 (1166000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0208 Interval 2334 (1166500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4555 4 episodes - episode_reward: -349.724 [-621.944, -120.884] - loss: 74.401 - mae: 114.893 - mean_q: 128.213 Interval 2335 (1167000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1888 Interval 2336 (1167500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4997 Interval 2337 (1168000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7632 1 episodes - episode_reward: -833.914 [-833.914, -833.914] - loss: 59.201 - mae: 114.899 - mean_q: 128.133 Interval 2338 (1168500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5020 1 episodes - episode_reward: -277.416 [-277.416, -277.416] - loss: 70.920 - mae: 113.847 - mean_q: 127.814 Interval 2339 (1169000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4573 Interval 2340 (1169500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.0529 Interval 2341 (1170000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2880 Interval 2342 (1170500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4999 2 episodes - episode_reward: -533.585 [-538.736, -528.435] - loss: 69.631 - mae: 114.640 - mean_q: 132.027 Interval 2343 (1171000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5745 2 episodes - episode_reward: -433.232 [-656.205, -210.260] - loss: 66.556 - mae: 115.248 - mean_q: 133.707 Interval 2344 (1171500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6809 3 episodes - episode_reward: -272.780 [-406.784, -137.725] - loss: 61.849 - mae: 115.749 - mean_q: 133.894 Interval 2345 (1172000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7659 2 episodes - episode_reward: -144.342 [-191.508, -97.175] - loss: 63.810 - mae: 115.927 - mean_q: 134.117 Interval 2346 (1172500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2070 2 episodes - episode_reward: -324.995 [-376.555, -273.436] - loss: 60.434 - mae: 116.791 - mean_q: 136.486 Interval 2347 (1173000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3243 2 episodes - episode_reward: -261.344 [-389.360, -133.328] - loss: 69.241 - mae: 115.692 - mean_q: 133.128 Interval 2348 (1173500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6836 2 episodes - episode_reward: -256.583 [-298.709, -214.457] - loss: 57.517 - mae: 115.333 - mean_q: 131.484 Interval 2349 (1174000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6824 1 episodes - episode_reward: -195.373 [-195.373, -195.373] - loss: 52.437 - mae: 115.799 - mean_q: 134.426 Interval 2350 (1174500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1293 Interval 2351 (1175000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7544 2 episodes - episode_reward: -216.758 [-256.809, -176.706] - loss: 48.366 - mae: 115.121 - mean_q: 132.711 Interval 2352 (1175500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3864 Interval 2353 (1176000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7462 1 episodes - episode_reward: -569.108 [-569.108, -569.108] - loss: 66.366 - mae: 113.694 - mean_q: 130.849 Interval 2354 (1176500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0246 Interval 2355 (1177000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2462 Interval 2356 (1177500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1650 Interval 2357 (1178000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1048 Interval 2358 (1178500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4966 2 episodes - episode_reward: -226.476 [-419.568, -33.384] - loss: 49.277 - mae: 110.908 - mean_q: 130.041 Interval 2359 (1179000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3438 Interval 2360 (1179500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9181 1 episodes - episode_reward: -1157.068 [-1157.068, -1157.068] - loss: 39.243 - mae: 110.381 - mean_q: 129.895 Interval 2361 (1180000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1920 Interval 2362 (1180500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2668 Interval 2363 (1181000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9166 3 episodes - episode_reward: -550.685 [-830.552, -405.415] - loss: 44.348 - mae: 108.340 - mean_q: 126.553 Interval 2364 (1181500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4890 1 episodes - episode_reward: -229.657 [-229.657, -229.657] - loss: 52.304 - mae: 107.182 - mean_q: 123.413 Interval 2365 (1182000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4434 1 episodes - episode_reward: -1193.723 [-1193.723, -1193.723] - loss: 48.103 - mae: 106.764 - mean_q: 122.671 Interval 2366 (1182500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4076 1 episodes - episode_reward: -273.499 [-273.499, -273.499] - loss: 46.311 - mae: 106.723 - mean_q: 123.841 Interval 2367 (1183000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3351 1 episodes - episode_reward: -268.622 [-268.622, -268.622] - loss: 43.158 - mae: 106.056 - mean_q: 122.211 Interval 2368 (1183500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1695 Interval 2369 (1184000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1579 Interval 2370 (1184500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2802 Interval 2371 (1185000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0267 2 episodes - episode_reward: -389.912 [-505.019, -274.805] - loss: 51.313 - mae: 111.221 - mean_q: 129.462 Interval 2372 (1185500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2750 Interval 2373 (1186000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0177 Interval 2374 (1186500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.5730 1 episodes - episode_reward: -447.542 [-447.542, -447.542] - loss: 47.138 - mae: 116.589 - mean_q: 138.143 Interval 2375 (1187000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.0493 1 episodes - episode_reward: -133.550 [-133.550, -133.550] - loss: 40.529 - mae: 119.743 - mean_q: 142.086 Interval 2376 (1187500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3463 3 episodes - episode_reward: -126.427 [-157.272, -88.253] - loss: 44.161 - mae: 121.606 - mean_q: 145.790 Interval 2377 (1188000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9770 4 episodes - episode_reward: -165.989 [-233.535, -105.527] - loss: 41.236 - mae: 121.686 - mean_q: 145.877 Interval 2378 (1188500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4470 Interval 2379 (1189000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2370 2 episodes - episode_reward: -157.612 [-181.892, -133.332] - loss: 48.702 - mae: 124.972 - mean_q: 151.784 Interval 2380 (1189500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1301 Interval 2381 (1190000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3818 1 episodes - episode_reward: -149.588 [-149.588, -149.588] - loss: 47.890 - mae: 123.305 - mean_q: 150.611 Interval 2382 (1190500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0842 2 episodes - episode_reward: -333.574 [-566.090, -101.057] - loss: 42.476 - mae: 122.310 - mean_q: 149.422 Interval 2383 (1191000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7853 3 episodes - episode_reward: -283.828 [-367.052, -182.672] - loss: 51.715 - mae: 121.113 - mean_q: 147.272 Interval 2384 (1191500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2380 1 episodes - episode_reward: -114.868 [-114.868, -114.868] - loss: 46.523 - mae: 120.421 - mean_q: 148.018 Interval 2385 (1192000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9960 5 episodes - episode_reward: -194.082 [-287.347, -80.047] - loss: 48.918 - mae: 118.951 - mean_q: 145.992 Interval 2386 (1192500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0861 Interval 2387 (1193000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2314 Interval 2388 (1193500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5248 Interval 2389 (1194000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1359 6 episodes - episode_reward: -244.444 [-536.811, -77.900] - loss: 45.162 - mae: 110.109 - mean_q: 137.871 Interval 2390 (1194500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9596 5 episodes - episode_reward: -200.889 [-368.894, -104.653] - loss: 38.577 - mae: 108.506 - mean_q: 136.637 Interval 2391 (1195000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6000 Interval 2392 (1195500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7598 1 episodes - episode_reward: -572.595 [-572.595, -572.595] - loss: 44.740 - mae: 104.377 - mean_q: 131.695 Interval 2393 (1196000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9701 1 episodes - episode_reward: -537.637 [-537.637, -537.637] - loss: 37.526 - mae: 101.791 - mean_q: 129.250 Interval 2394 (1196500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7851 1 episodes - episode_reward: -347.872 [-347.872, -347.872] - loss: 49.883 - mae: 100.504 - mean_q: 127.455 Interval 2395 (1197000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5765 3 episodes - episode_reward: -265.761 [-424.555, -144.293] - loss: 48.536 - mae: 98.499 - mean_q: 125.385 Interval 2396 (1197500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7904 3 episodes - episode_reward: -126.172 [-280.264, -16.161] - loss: 38.590 - mae: 97.330 - mean_q: 124.293 Interval 2397 (1198000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3339 6 episodes - episode_reward: -206.846 [-368.675, -116.886] - loss: 49.108 - mae: 95.713 - mean_q: 121.744 Interval 2398 (1198500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6760 2 episodes - episode_reward: -356.667 [-376.762, -336.572] - loss: 49.288 - mae: 94.624 - mean_q: 120.697 Interval 2399 (1199000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0981 3 episodes - episode_reward: -199.576 [-314.250, -137.375] - loss: 49.818 - mae: 93.463 - mean_q: 118.019 Interval 2400 (1199500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8673 1 episodes - episode_reward: -562.711 [-562.711, -562.711] - loss: 39.373 - mae: 91.571 - mean_q: 115.390 Interval 2401 (1200000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0203 2 episodes - episode_reward: -416.731 [-575.870, -257.592] - loss: 50.232 - mae: 90.927 - mean_q: 113.787 Interval 2402 (1200500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7703 1 episodes - episode_reward: -377.845 [-377.845, -377.845] - loss: 33.848 - mae: 90.479 - mean_q: 113.121 Interval 2403 (1201000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7899 4 episodes - episode_reward: -367.542 [-419.740, -306.978] - loss: 44.316 - mae: 86.916 - mean_q: 107.209 Interval 2404 (1201500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5689 2 episodes - episode_reward: -656.760 [-700.888, -612.631] - loss: 38.444 - mae: 85.292 - mean_q: 104.201 Interval 2405 (1202000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0190 3 episodes - episode_reward: -452.604 [-637.430, -115.649] - loss: 43.466 - mae: 84.764 - mean_q: 102.011 Interval 2406 (1202500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1179 2 episodes - episode_reward: -328.081 [-413.406, -242.756] - loss: 50.328 - mae: 82.680 - mean_q: 99.045 Interval 2407 (1203000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4178 2 episodes - episode_reward: -377.679 [-438.816, -316.542] - loss: 42.570 - mae: 81.140 - mean_q: 94.976 Interval 2408 (1203500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2259 2 episodes - episode_reward: -423.665 [-761.722, -85.609] - loss: 51.915 - mae: 78.807 - mean_q: 90.291 Interval 2409 (1204000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5980 3 episodes - episode_reward: -330.201 [-599.049, -159.136] - loss: 36.630 - mae: 77.586 - mean_q: 87.778 Interval 2410 (1204500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6380 4 episodes - episode_reward: -230.200 [-365.608, -141.020] - loss: 48.556 - mae: 75.221 - mean_q: 84.444 Interval 2411 (1205000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0847 4 episodes - episode_reward: -224.075 [-256.585, -173.153] - loss: 41.462 - mae: 72.839 - mean_q: 80.019 Interval 2412 (1205500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6692 4 episodes - episode_reward: -232.522 [-257.724, -194.995] - loss: 38.284 - mae: 71.363 - mean_q: 76.527 Interval 2413 (1206000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4574 4 episodes - episode_reward: -310.152 [-844.272, 3.591] - loss: 38.853 - mae: 69.116 - mean_q: 72.904 Interval 2414 (1206500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8755 4 episodes - episode_reward: -250.217 [-366.628, -131.047] - loss: 50.529 - mae: 68.194 - mean_q: 71.459 Interval 2415 (1207000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8645 2 episodes - episode_reward: -199.267 [-218.630, -179.903] - loss: 40.183 - mae: 69.845 - mean_q: 73.888 Interval 2416 (1207500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2667 6 episodes - episode_reward: -177.386 [-337.493, -99.696] - loss: 45.564 - mae: 70.777 - mean_q: 74.193 Interval 2417 (1208000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0830 6 episodes - episode_reward: -180.799 [-234.453, -77.333] - loss: 44.926 - mae: 72.057 - mean_q: 76.223 Interval 2418 (1208500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2461 4 episodes - episode_reward: -173.136 [-201.491, -110.868] - loss: 41.803 - mae: 74.117 - mean_q: 77.824 Interval 2419 (1209000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0555 2 episodes - episode_reward: -242.782 [-282.612, -202.952] - loss: 48.407 - mae: 75.621 - mean_q: 80.741 Interval 2420 (1209500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0244 3 episodes - episode_reward: -154.227 [-231.932, -90.133] - loss: 42.424 - mae: 77.024 - mean_q: 82.742 Interval 2421 (1210000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0216 2 episodes - episode_reward: -222.248 [-227.297, -217.199] - loss: 49.779 - mae: 77.670 - mean_q: 83.999 Interval 2422 (1210500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0701 2 episodes - episode_reward: -268.122 [-370.319, -165.925] - loss: 44.059 - mae: 78.323 - mean_q: 84.092 Interval 2423 (1211000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0259 4 episodes - episode_reward: -260.805 [-460.541, -89.229] - loss: 46.747 - mae: 80.998 - mean_q: 86.622 Interval 2424 (1211500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8512 3 episodes - episode_reward: -188.555 [-290.910, -68.933] - loss: 46.377 - mae: 80.727 - mean_q: 86.430 Interval 2425 (1212000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8082 1 episodes - episode_reward: -213.031 [-213.031, -213.031] - loss: 48.602 - mae: 83.393 - mean_q: 90.723 Interval 2426 (1212500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0093 4 episodes - episode_reward: -160.861 [-243.822, -64.636] - loss: 51.217 - mae: 83.644 - mean_q: 89.628 Interval 2427 (1213000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0359 5 episodes - episode_reward: -192.303 [-327.158, -104.245] - loss: 60.602 - mae: 83.702 - mean_q: 87.970 Interval 2428 (1213500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3075 3 episodes - episode_reward: -193.785 [-334.743, -94.981] - loss: 55.212 - mae: 81.853 - mean_q: 83.527 Interval 2429 (1214000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8554 4 episodes - episode_reward: -130.112 [-227.954, -31.143] - loss: 50.500 - mae: 83.524 - mean_q: 86.012 Interval 2430 (1214500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9656 3 episodes - episode_reward: -170.573 [-197.353, -124.033] - loss: 57.893 - mae: 83.925 - mean_q: 84.708 Interval 2431 (1215000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1869 5 episodes - episode_reward: -119.461 [-187.021, -75.177] - loss: 52.614 - mae: 85.369 - mean_q: 88.207 Interval 2432 (1215500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3305 1 episodes - episode_reward: -63.007 [-63.007, -63.007] - loss: 58.636 - mae: 85.199 - mean_q: 86.692 Interval 2433 (1216000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0684 3 episodes - episode_reward: -208.422 [-405.386, -24.614] - loss: 57.796 - mae: 86.538 - mean_q: 88.140 Interval 2434 (1216500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7831 5 episodes - episode_reward: -179.339 [-221.057, -110.387] - loss: 65.693 - mae: 89.244 - mean_q: 93.309 Interval 2435 (1217000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5463 1 episodes - episode_reward: -287.546 [-287.546, -287.546] - loss: 58.560 - mae: 90.134 - mean_q: 94.040 Interval 2436 (1217500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2884 3 episodes - episode_reward: -230.122 [-305.411, -171.710] - loss: 60.601 - mae: 91.125 - mean_q: 94.533 Interval 2437 (1218000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3338 3 episodes - episode_reward: -179.154 [-213.102, -122.200] - loss: 64.161 - mae: 89.692 - mean_q: 92.933 Interval 2438 (1218500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1617 3 episodes - episode_reward: -178.500 [-251.232, -98.841] - loss: 58.841 - mae: 90.608 - mean_q: 93.333 Interval 2439 (1219000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9851 2 episodes - episode_reward: -264.033 [-314.314, -213.751] - loss: 58.782 - mae: 91.694 - mean_q: 94.835 Interval 2440 (1219500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8092 3 episodes - episode_reward: -182.563 [-257.081, -123.216] - loss: 66.122 - mae: 92.716 - mean_q: 97.122 Interval 2441 (1220000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5862 1 episodes - episode_reward: -264.469 [-264.469, -264.469] - loss: 64.054 - mae: 95.497 - mean_q: 99.732 Interval 2442 (1220500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0025 5 episodes - episode_reward: -199.848 [-365.184, -124.816] - loss: 73.058 - mae: 97.502 - mean_q: 101.413 Interval 2443 (1221000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9160 2 episodes - episode_reward: -144.156 [-190.943, -97.368] - loss: 79.115 - mae: 99.899 - mean_q: 103.531 Interval 2444 (1221500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0600 2 episodes - episode_reward: -304.613 [-364.151, -245.075] - loss: 75.288 - mae: 103.018 - mean_q: 107.696 Interval 2445 (1222000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5330 1 episodes - episode_reward: -297.472 [-297.472, -297.472] - loss: 76.949 - mae: 103.197 - mean_q: 106.905 Interval 2446 (1222500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1564 Interval 2447 (1223000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5020 Interval 2448 (1223500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0594 2 episodes - episode_reward: -462.357 [-609.026, -315.688] - loss: 73.335 - mae: 103.867 - mean_q: 105.990 Interval 2449 (1224000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7969 1 episodes - episode_reward: -172.523 [-172.523, -172.523] - loss: 81.995 - mae: 102.751 - mean_q: 104.057 Interval 2450 (1224500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3613 2 episodes - episode_reward: -379.035 [-573.171, -184.899] - loss: 76.003 - mae: 101.933 - mean_q: 104.430 Interval 2451 (1225000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9334 1 episodes - episode_reward: -442.374 [-442.374, -442.374] - loss: 78.721 - mae: 101.650 - mean_q: 102.181 Interval 2452 (1225500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0675 1 episodes - episode_reward: -830.539 [-830.539, -830.539] - loss: 76.712 - mae: 101.955 - mean_q: 102.963 Interval 2453 (1226000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9711 4 episodes - episode_reward: -303.994 [-716.346, -70.828] - loss: 78.710 - mae: 101.804 - mean_q: 102.016 Interval 2454 (1226500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7240 4 episodes - episode_reward: -282.866 [-342.666, -182.577] - loss: 97.834 - mae: 102.277 - mean_q: 104.076 Interval 2455 (1227000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3966 5 episodes - episode_reward: -299.980 [-562.184, -132.732] - loss: 96.248 - mae: 101.968 - mean_q: 103.863 Interval 2456 (1227500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0433 1 episodes - episode_reward: -403.640 [-403.640, -403.640] - loss: 90.406 - mae: 101.383 - mean_q: 103.554 Interval 2457 (1228000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6674 4 episodes - episode_reward: -342.969 [-483.397, -185.459] - loss: 107.057 - mae: 101.675 - mean_q: 102.874 Interval 2458 (1228500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5563 1 episodes - episode_reward: -369.162 [-369.162, -369.162] - loss: 111.114 - mae: 102.839 - mean_q: 106.068 Interval 2459 (1229000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1115 2 episodes - episode_reward: -288.838 [-322.388, -255.288] - loss: 123.403 - mae: 104.273 - mean_q: 108.865 Interval 2460 (1229500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3612 2 episodes - episode_reward: -259.905 [-394.043, -125.767] - loss: 133.024 - mae: 106.637 - mean_q: 115.731 Interval 2461 (1230000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6372 3 episodes - episode_reward: -289.360 [-479.182, -110.798] - loss: 148.272 - mae: 107.917 - mean_q: 119.472 Interval 2462 (1230500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8953 1 episodes - episode_reward: -555.436 [-555.436, -555.436] - loss: 163.480 - mae: 110.645 - mean_q: 124.413 Interval 2463 (1231000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3153 Interval 2464 (1231500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0781 Interval 2465 (1232000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2128 Interval 2466 (1232500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1307 Interval 2467 (1233000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.5237 Interval 2468 (1233500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4830 2 episodes - episode_reward: -403.157 [-661.784, -144.530] - loss: 238.080 - mae: 156.268 - mean_q: 204.057 Interval 2469 (1234000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2355 Interval 2470 (1234500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1338 Interval 2471 (1235000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4224 Interval 2472 (1235500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: 0.2054 Interval 2473 (1236000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.5953 Interval 2474 (1236500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2086 Interval 2475 (1237000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1334 Interval 2476 (1237500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: 0.1408 Interval 2477 (1238000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8471 1 episodes - episode_reward: -999.121 [-999.121, -999.121] - loss: 488.088 - mae: 324.233 - mean_q: 442.405 Interval 2478 (1238500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5305 1 episodes - episode_reward: -392.920 [-392.920, -392.920] - loss: 508.152 - mae: 337.299 - mean_q: 458.895 Interval 2479 (1239000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1023 1 episodes - episode_reward: -143.978 [-143.978, -143.978] - loss: 484.835 - mae: 353.602 - mean_q: 479.570 Interval 2480 (1239500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.1186 2 episodes - episode_reward: 45.798 [-44.935, 136.531] - loss: 562.527 - mae: 364.533 - mean_q: 492.751 Interval 2481 (1240000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0029 1 episodes - episode_reward: -301.439 [-301.439, -301.439] - loss: 512.762 - mae: 369.228 - mean_q: 498.090 Interval 2482 (1240500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0585 2 episodes - episode_reward: -337.879 [-373.879, -301.879] - loss: 481.034 - mae: 370.906 - mean_q: 500.814 Interval 2483 (1241000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4425 1 episodes - episode_reward: -360.696 [-360.696, -360.696] - loss: 550.250 - mae: 380.289 - mean_q: 514.218 Interval 2484 (1241500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0073 2 episodes - episode_reward: -484.231 [-569.671, -398.791] - loss: 509.751 - mae: 387.686 - mean_q: 525.121 Interval 2485 (1242000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8904 4 episodes - episode_reward: -362.272 [-653.747, -105.467] - loss: 614.416 - mae: 394.589 - mean_q: 535.794 Interval 2486 (1242500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2494 2 episodes - episode_reward: -598.305 [-657.758, -538.852] - loss: 555.811 - mae: 403.534 - mean_q: 549.221 Interval 2487 (1243000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8402 3 episodes - episode_reward: -398.702 [-707.118, -108.245] - loss: 551.125 - mae: 411.969 - mean_q: 561.266 Interval 2488 (1243500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6276 2 episodes - episode_reward: -551.168 [-628.043, -474.293] - loss: 535.593 - mae: 424.146 - mean_q: 577.957 Interval 2489 (1244000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1750 3 episodes - episode_reward: -281.870 [-458.333, 1.228] - loss: 523.535 - mae: 436.891 - mean_q: 595.117 Interval 2490 (1244500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9390 2 episodes - episode_reward: -532.212 [-715.239, -349.185] - loss: 569.475 - mae: 452.247 - mean_q: 613.942 Interval 2491 (1245000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4616 Interval 2492 (1245500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0622 Interval 2493 (1246000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1457 Interval 2494 (1246500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2613 Interval 2495 (1247000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7445 1 episodes - episode_reward: -621.101 [-621.101, -621.101] - loss: 578.918 - mae: 520.913 - mean_q: 703.000 Interval 2496 (1247500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0870 2 episodes - episode_reward: -269.867 [-297.661, -242.073] - loss: 614.477 - mae: 535.166 - mean_q: 721.486 Interval 2497 (1248000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8146 2 episodes - episode_reward: -224.087 [-228.972, -219.202] - loss: 620.230 - mae: 541.371 - mean_q: 729.733 Interval 2498 (1248500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9477 3 episodes - episode_reward: -208.366 [-289.806, -100.000] - loss: 636.741 - mae: 558.575 - mean_q: 754.306 Interval 2499 (1249000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5573 1 episodes - episode_reward: -229.272 [-229.272, -229.272] - loss: 629.146 - mae: 577.985 - mean_q: 781.039 Interval 2500 (1249500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7465 3 episodes - episode_reward: -303.297 [-324.750, -263.956] - loss: 572.956 - mae: 602.685 - mean_q: 815.467 Interval 2501 (1250000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9496 3 episodes - episode_reward: -320.090 [-412.003, -229.507] - loss: 628.085 - mae: 617.276 - mean_q: 837.116 Interval 2502 (1250500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1142 2 episodes - episode_reward: -288.037 [-393.850, -182.225] - loss: 785.995 - mae: 643.601 - mean_q: 871.525 Interval 2503 (1251000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5524 4 episodes - episode_reward: -173.103 [-245.072, -101.050] - loss: 778.850 - mae: 665.288 - mean_q: 901.485 Interval 2504 (1251500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8095 1 episodes - episode_reward: -333.534 [-333.534, -333.534] - loss: 915.970 - mae: 683.350 - mean_q: 925.041 Interval 2505 (1252000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9853 4 episodes - episode_reward: -281.439 [-346.906, -180.831] - loss: 897.456 - mae: 703.563 - mean_q: 951.830 Interval 2506 (1252500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3461 2 episodes - episode_reward: -299.731 [-361.570, -237.891] - loss: 730.667 - mae: 717.162 - mean_q: 971.144 Interval 2507 (1253000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2595 4 episodes - episode_reward: -292.765 [-381.751, -171.314] - loss: 856.898 - mae: 728.528 - mean_q: 984.235 Interval 2508 (1253500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7588 6 episodes - episode_reward: -235.033 [-397.610, -91.849] - loss: 726.322 - mae: 723.723 - mean_q: 976.390 Interval 2509 (1254000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0410 1 episodes - episode_reward: -321.461 [-321.461, -321.461] - loss: 920.509 - mae: 732.503 - mean_q: 988.424 Interval 2510 (1254500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9519 2 episodes - episode_reward: -241.818 [-348.998, -134.637] - loss: 759.947 - mae: 726.698 - mean_q: 980.142 Interval 2511 (1255000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6997 3 episodes - episode_reward: -275.608 [-350.932, -169.077] - loss: 861.217 - mae: 729.815 - mean_q: 982.701 Interval 2512 (1255500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9001 1 episodes - episode_reward: -315.749 [-315.749, -315.749] - loss: 698.415 - mae: 723.153 - mean_q: 973.269 Interval 2513 (1256000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6114 2 episodes - episode_reward: -429.665 [-479.453, -379.877] - loss: 656.652 - mae: 719.819 - mean_q: 966.364 Interval 2514 (1256500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3899 3 episodes - episode_reward: -246.468 [-481.518, -124.005] - loss: 686.711 - mae: 718.057 - mean_q: 964.060 Interval 2515 (1257000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7287 4 episodes - episode_reward: -227.725 [-387.567, -79.015] - loss: 1015.454 - mae: 711.262 - mean_q: 954.742 Interval 2516 (1257500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5012 3 episodes - episode_reward: -385.294 [-487.328, -227.019] - loss: 824.840 - mae: 713.462 - mean_q: 958.239 Interval 2517 (1258000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3443 3 episodes - episode_reward: -298.233 [-425.927, -86.188] - loss: 692.369 - mae: 708.787 - mean_q: 953.554 Interval 2518 (1258500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1087 1 episodes - episode_reward: -626.974 [-626.974, -626.974] - loss: 913.161 - mae: 721.901 - mean_q: 971.449 Interval 2519 (1259000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1079 3 episodes - episode_reward: -359.438 [-432.466, -270.064] - loss: 1163.164 - mae: 723.083 - mean_q: 974.073 Interval 2520 (1259500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2649 3 episodes - episode_reward: -350.263 [-397.255, -299.740] - loss: 985.139 - mae: 737.337 - mean_q: 993.761 Interval 2521 (1260000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4201 1 episodes - episode_reward: -406.715 [-406.715, -406.715] - loss: 991.475 - mae: 746.079 - mean_q: 1009.555 Interval 2522 (1260500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8221 3 episodes - episode_reward: -425.812 [-716.109, -101.729] - loss: 1212.628 - mae: 771.565 - mean_q: 1045.984 Interval 2523 (1261000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4177 1 episodes - episode_reward: -745.562 [-745.562, -745.562] - loss: 1522.382 - mae: 800.255 - mean_q: 1084.828 Interval 2524 (1261500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5336 Interval 2525 (1262000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5212 1 episodes - episode_reward: -838.104 [-838.104, -838.104] - loss: 1720.134 - mae: 859.159 - mean_q: 1168.428 Interval 2526 (1262500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1942 2 episodes - episode_reward: -396.119 [-442.158, -350.081] - loss: 1719.193 - mae: 891.845 - mean_q: 1213.741 Interval 2527 (1263000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2846 Interval 2528 (1263500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4744 Interval 2529 (1264000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5986 1 episodes - episode_reward: -2018.596 [-2018.596, -2018.596] - loss: 2022.365 - mae: 963.983 - mean_q: 1310.298 Interval 2530 (1264500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9058 Interval 2531 (1265000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6921 3 episodes - episode_reward: -783.731 [-1457.657, -304.461] - loss: 2524.681 - mae: 1003.362 - mean_q: 1363.032 Interval 2532 (1265500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0274 1 episodes - episode_reward: -248.509 [-248.509, -248.509] - loss: 1945.193 - mae: 1015.988 - mean_q: 1381.587 Interval 2533 (1266000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1393 2 episodes - episode_reward: -709.887 [-1315.376, -104.399] - loss: 2051.931 - mae: 1020.945 - mean_q: 1385.216 Interval 2534 (1266500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8859 Interval 2535 (1267000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2100 4 episodes - episode_reward: -721.913 [-2040.253, -100.000] - loss: 1988.339 - mae: 1044.821 - mean_q: 1418.189 Interval 2536 (1267500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3227 Interval 2537 (1268000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.9816 3 episodes - episode_reward: -1246.210 [-2768.679, -315.909] - loss: 1497.988 - mae: 1034.284 - mean_q: 1402.567 Interval 2538 (1268500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.3660 5 episodes - episode_reward: -529.099 [-897.016, -266.056] - loss: 2324.805 - mae: 1040.368 - mean_q: 1408.825 Interval 2539 (1269000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6745 4 episodes - episode_reward: -470.952 [-1066.063, -100.000] - loss: 3227.945 - mae: 1035.462 - mean_q: 1400.324 Interval 2540 (1269500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5534 3 episodes - episode_reward: -607.489 [-1045.239, -289.465] - loss: 2638.183 - mae: 1017.958 - mean_q: 1374.847 Interval 2541 (1270000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2136 1 episodes - episode_reward: -768.078 [-768.078, -768.078] - loss: 2264.875 - mae: 1009.775 - mean_q: 1361.772 Interval 2542 (1270500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.3277 3 episodes - episode_reward: -916.195 [-1445.124, -221.876] - loss: 1494.569 - mae: 1005.753 - mean_q: 1355.257 Interval 2543 (1271000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6195 5 episodes - episode_reward: -398.662 [-541.924, -252.297] - loss: 2769.611 - mae: 986.065 - mean_q: 1324.909 Interval 2544 (1271500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9677 2 episodes - episode_reward: -350.081 [-387.208, -312.953] - loss: 1392.377 - mae: 963.121 - mean_q: 1293.551 Interval 2545 (1272000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6780 2 episodes - episode_reward: -513.708 [-635.917, -391.498] - loss: 1371.823 - mae: 942.476 - mean_q: 1263.253 Interval 2546 (1272500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4221 2 episodes - episode_reward: -593.983 [-972.472, -215.494] - loss: 1399.259 - mae: 922.616 - mean_q: 1234.973 Interval 2547 (1273000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2367 1 episodes - episode_reward: -1774.472 [-1774.472, -1774.472] - loss: 1388.495 - mae: 896.289 - mean_q: 1199.490 Interval 2548 (1273500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2893 Interval 2549 (1274000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4055 Interval 2550 (1274500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8622 1 episodes - episode_reward: -2719.138 [-2719.138, -2719.138] - loss: 1296.926 - mae: 849.631 - mean_q: 1131.958 Interval 2551 (1275000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8881 1 episodes - episode_reward: -1534.685 [-1534.685, -1534.685] - loss: 1317.870 - mae: 844.232 - mean_q: 1124.027 Interval 2552 (1275500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6909 3 episodes - episode_reward: -439.435 [-641.508, -206.184] - loss: 1427.729 - mae: 827.038 - mean_q: 1100.993 Interval 2553 (1276000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2395 2 episodes - episode_reward: -1264.499 [-2440.302, -88.696] - loss: 1215.064 - mae: 831.239 - mean_q: 1107.781 Interval 2554 (1276500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.7860 2 episodes - episode_reward: -1014.479 [-1885.846, -143.112] - loss: 1478.307 - mae: 834.919 - mean_q: 1112.649 Interval 2555 (1277000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4237 3 episodes - episode_reward: -402.729 [-501.502, -286.035] - loss: 1614.386 - mae: 834.621 - mean_q: 1114.144 Interval 2556 (1277500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2320 4 episodes - episode_reward: -236.587 [-348.721, -101.576] - loss: 1224.285 - mae: 824.425 - mean_q: 1103.485 Interval 2557 (1278000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.9641 1 episodes - episode_reward: -2131.766 [-2131.766, -2131.766] - loss: 1709.566 - mae: 822.083 - mean_q: 1098.266 Interval 2558 (1278500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7601 4 episodes - episode_reward: -286.895 [-645.041, -145.937] - loss: 1404.289 - mae: 820.521 - mean_q: 1095.677 Interval 2559 (1279000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5093 2 episodes - episode_reward: -284.945 [-563.235, -6.655] - loss: 1505.316 - mae: 806.680 - mean_q: 1078.307 Interval 2560 (1279500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7218 Interval 2561 (1280000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4227 5 episodes - episode_reward: -503.066 [-1410.829, -91.879] - loss: 1241.299 - mae: 800.771 - mean_q: 1070.149 Interval 2562 (1280500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3519 Interval 2563 (1281000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7039 1 episodes - episode_reward: -1308.752 [-1308.752, -1308.752] - loss: 1442.047 - mae: 770.569 - mean_q: 1031.481 Interval 2564 (1281500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8973 Interval 2565 (1282000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1637 1 episodes - episode_reward: -1465.687 [-1465.687, -1465.687] - loss: 1312.916 - mae: 746.771 - mean_q: 1000.605 Interval 2566 (1282500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -6.6978 1 episodes - episode_reward: -4005.905 [-4005.905, -4005.905] - loss: 1079.479 - mae: 729.848 - mean_q: 974.931 Interval 2567 (1283000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2246 Interval 2568 (1283500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.8874 2 episodes - episode_reward: -2064.411 [-3836.982, -291.839] - loss: 734.188 - mae: 691.645 - mean_q: 920.138 Interval 2569 (1284000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4186 1 episodes - episode_reward: -114.622 [-114.622, -114.622] - loss: 1058.715 - mae: 675.359 - mean_q: 896.566 Interval 2570 (1284500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9325 2 episodes - episode_reward: -282.458 [-533.528, -31.389] - loss: 841.356 - mae: 666.311 - mean_q: 883.218 Interval 2571 (1285000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5453 1 episodes - episode_reward: -33.130 [-33.130, -33.130] - loss: 850.212 - mae: 649.607 - mean_q: 859.197 Interval 2572 (1285500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1229 Interval 2573 (1286000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5246 2 episodes - episode_reward: -1059.798 [-1759.241, -360.356] - loss: 841.802 - mae: 615.480 - mean_q: 812.196 Interval 2574 (1286500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.5419 6 episodes - episode_reward: -126.485 [-210.802, -54.468] - loss: 752.733 - mae: 601.834 - mean_q: 793.367 Interval 2575 (1287000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.8268 6 episodes - episode_reward: -150.199 [-284.989, -92.361] - loss: 848.109 - mae: 593.220 - mean_q: 782.541 Interval 2576 (1287500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3650 2 episodes - episode_reward: -98.023 [-174.692, -21.354] - loss: 886.928 - mae: 581.157 - mean_q: 764.593 Interval 2577 (1288000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7916 3 episodes - episode_reward: -132.302 [-359.203, -16.128] - loss: 723.986 - mae: 568.906 - mean_q: 746.979 Interval 2578 (1288500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5201 5 episodes - episode_reward: -154.678 [-290.853, -36.810] - loss: 715.569 - mae: 555.572 - mean_q: 730.474 Interval 2579 (1289000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8931 1 episodes - episode_reward: -467.554 [-467.554, -467.554] - loss: 716.859 - mae: 548.107 - mean_q: 721.634 Interval 2580 (1289500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1812 1 episodes - episode_reward: -110.170 [-110.170, -110.170] - loss: 776.170 - mae: 544.172 - mean_q: 714.295 Interval 2581 (1290000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3927 2 episodes - episode_reward: 11.365 [-102.760, 125.490] - loss: 844.090 - mae: 547.533 - mean_q: 721.000 Interval 2582 (1290500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7943 3 episodes - episode_reward: -516.403 [-737.791, -343.417] - loss: 800.592 - mae: 545.942 - mean_q: 717.842 Interval 2583 (1291000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0132 1 episodes - episode_reward: -80.235 [-80.235, -80.235] - loss: 742.783 - mae: 541.963 - mean_q: 713.416 Interval 2584 (1291500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0909 Interval 2585 (1292000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -1.3266 Interval 2586 (1292500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6548 1 episodes - episode_reward: -974.084 [-974.084, -974.084] - loss: 737.028 - mae: 521.054 - mean_q: 684.188 Interval 2587 (1293000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9954 Interval 2588 (1293500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1913 1 episodes - episode_reward: -1063.165 [-1063.165, -1063.165] - loss: 675.750 - mae: 492.211 - mean_q: 644.814 Interval 2589 (1294000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4321 1 episodes - episode_reward: -256.743 [-256.743, -256.743] - loss: 644.551 - mae: 486.102 - mean_q: 637.543 Interval 2590 (1294500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4646 2 episodes - episode_reward: -545.469 [-978.563, -112.374] - loss: 567.577 - mae: 468.443 - mean_q: 612.729 Interval 2591 (1295000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2084 1 episodes - episode_reward: -614.421 [-614.421, -614.421] - loss: 623.435 - mae: 458.976 - mean_q: 598.591 Interval 2592 (1295500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.9587 1 episodes - episode_reward: -987.252 [-987.252, -987.252] - loss: 721.830 - mae: 441.585 - mean_q: 574.548 Interval 2593 (1296000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1375 1 episodes - episode_reward: -488.325 [-488.325, -488.325] - loss: 536.434 - mae: 430.778 - mean_q: 559.292 Interval 2594 (1296500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.3283 1 episodes - episode_reward: -727.580 [-727.580, -727.580] - loss: 581.838 - mae: 416.156 - mean_q: 539.855 Interval 2595 (1297000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0868 Interval 2596 (1297500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3683 1 episodes - episode_reward: -1206.226 [-1206.226, -1206.226] - loss: 578.643 - mae: 396.285 - mean_q: 511.520 Interval 2597 (1298000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4507 Interval 2598 (1298500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0885 Interval 2599 (1299000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2231 Interval 2600 (1299500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1626 Interval 2601 (1300000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7633 1 episodes - episode_reward: -861.911 [-861.911, -861.911] - loss: 660.619 - mae: 375.484 - mean_q: 481.615 Interval 2602 (1300500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2685 Interval 2603 (1301000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2032 Interval 2604 (1301500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2654 Interval 2605 (1302000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2877 Interval 2606 (1302500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.4064 1 episodes - episode_reward: -832.400 [-832.400, -832.400] - loss: 626.363 - mae: 382.861 - mean_q: 492.328 Interval 2607 (1303000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.6997 Interval 2608 (1303500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.5923 1 episodes - episode_reward: -527.343 [-527.343, -527.343] - loss: 577.671 - mae: 378.056 - mean_q: 484.963 Interval 2609 (1304000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2106 Interval 2610 (1304500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2013 Interval 2611 (1305000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2689 Interval 2612 (1305500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.3074 Interval 2613 (1306000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0442 2 episodes - episode_reward: -390.908 [-681.816, -100.000] - loss: 517.160 - mae: 362.035 - mean_q: 465.801 Interval 2614 (1306500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4111 1 episodes - episode_reward: -481.565 [-481.565, -481.565] - loss: 486.490 - mae: 355.949 - mean_q: 457.609 Interval 2615 (1307000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3448 Interval 2616 (1307500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6178 1 episodes - episode_reward: -441.550 [-441.550, -441.550] - loss: 395.480 - mae: 342.675 - mean_q: 439.725 Interval 2617 (1308000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3402 Interval 2618 (1308500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0628 Interval 2619 (1309000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1217 Interval 2620 (1309500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2993 Interval 2621 (1310000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2634 Interval 2622 (1310500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.7608 2 episodes - episode_reward: -532.601 [-902.418, -162.783] - loss: 422.912 - mae: 310.543 - mean_q: 399.409 Interval 2623 (1311000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0802 Interval 2624 (1311500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0633 1 episodes - episode_reward: -303.734 [-303.734, -303.734] - loss: 389.671 - mae: 300.023 - mean_q: 385.078 Interval 2625 (1312000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.0072 Interval 2626 (1312500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0085 Interval 2627 (1313000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0700 2 episodes - episode_reward: -319.411 [-516.453, -122.368] - loss: 349.836 - mae: 284.255 - mean_q: 361.817 Interval 2628 (1313500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3140 1 episodes - episode_reward: -253.699 [-253.699, -253.699] - loss: 295.469 - mae: 279.788 - mean_q: 357.987 Interval 2629 (1314000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8231 Interval 2630 (1314500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5356 2 episodes - episode_reward: -388.558 [-677.117, -100.000] - loss: 283.408 - mae: 270.549 - mean_q: 344.187 Interval 2631 (1315000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7702 4 episodes - episode_reward: -304.688 [-586.038, -133.568] - loss: 312.207 - mae: 266.633 - mean_q: 337.565 Interval 2632 (1315500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0987 2 episodes - episode_reward: -273.539 [-454.973, -92.105] - loss: 322.040 - mae: 261.407 - mean_q: 332.003 Interval 2633 (1316000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0236 1 episodes - episode_reward: -578.889 [-578.889, -578.889] - loss: 345.706 - mae: 258.795 - mean_q: 327.170 Interval 2634 (1316500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1624 3 episodes - episode_reward: -357.368 [-425.724, -224.514] - loss: 248.613 - mae: 253.241 - mean_q: 319.208 Interval 2635 (1317000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0366 1 episodes - episode_reward: -480.391 [-480.391, -480.391] - loss: 255.730 - mae: 250.659 - mean_q: 316.363 Interval 2636 (1317500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2001 4 episodes - episode_reward: -289.119 [-482.044, -119.407] - loss: 260.585 - mae: 248.211 - mean_q: 311.028 Interval 2637 (1318000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1148 Interval 2638 (1318500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8279 1 episodes - episode_reward: -521.475 [-521.475, -521.475] - loss: 320.028 - mae: 242.991 - mean_q: 304.423 Interval 2639 (1319000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4614 2 episodes - episode_reward: -316.491 [-539.988, -92.994] - loss: 250.719 - mae: 238.721 - mean_q: 296.735 Interval 2640 (1319500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2212 7 episodes - episode_reward: -379.768 [-727.423, -176.105] - loss: 241.900 - mae: 237.045 - mean_q: 292.249 Interval 2641 (1320000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5555 1 episodes - episode_reward: -238.354 [-238.354, -238.354] - loss: 222.521 - mae: 232.998 - mean_q: 286.588 Interval 2642 (1320500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0094 1 episodes - episode_reward: -578.003 [-578.003, -578.003] - loss: 250.024 - mae: 232.885 - mean_q: 287.838 Interval 2643 (1321000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1063 Interval 2644 (1321500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2225 Interval 2645 (1322000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2872 Interval 2646 (1322500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1760 Interval 2647 (1323000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1455 Interval 2648 (1323500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4536 2 episodes - episode_reward: -534.025 [-767.934, -300.116] - loss: 185.600 - mae: 215.191 - mean_q: 266.951 Interval 2649 (1324000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2307 1 episodes - episode_reward: -366.574 [-366.574, -366.574] - loss: 196.173 - mae: 214.997 - mean_q: 264.727 Interval 2650 (1324500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1790 3 episodes - episode_reward: -489.248 [-659.189, -309.285] - loss: 193.732 - mae: 214.561 - mean_q: 265.267 Interval 2651 (1325000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2483 2 episodes - episode_reward: -318.767 [-454.716, -182.818] - loss: 141.276 - mae: 213.410 - mean_q: 262.174 Interval 2652 (1325500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8365 3 episodes - episode_reward: -475.149 [-589.277, -282.041] - loss: 175.418 - mae: 215.165 - mean_q: 266.429 Interval 2653 (1326000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1034 1 episodes - episode_reward: -484.848 [-484.848, -484.848] - loss: 195.141 - mae: 213.259 - mean_q: 263.003 Interval 2654 (1326500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3565 3 episodes - episode_reward: -224.224 [-316.791, -123.783] - loss: 214.124 - mae: 216.724 - mean_q: 266.672 Interval 2655 (1327000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8475 3 episodes - episode_reward: -281.880 [-427.705, -174.442] - loss: 227.571 - mae: 222.575 - mean_q: 276.222 Interval 2656 (1327500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0121 3 episodes - episode_reward: -210.317 [-261.343, -126.871] - loss: 207.401 - mae: 228.268 - mean_q: 283.995 Interval 2657 (1328000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3194 5 episodes - episode_reward: -288.548 [-658.520, -112.874] - loss: 177.193 - mae: 231.685 - mean_q: 288.293 Interval 2658 (1328500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6266 2 episodes - episode_reward: -666.251 [-1060.037, -272.465] - loss: 196.847 - mae: 242.948 - mean_q: 303.675 Interval 2659 (1329000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4424 2 episodes - episode_reward: -448.404 [-462.026, -434.783] - loss: 359.410 - mae: 249.603 - mean_q: 313.692 Interval 2660 (1329500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9818 4 episodes - episode_reward: -264.226 [-374.967, -154.257] - loss: 240.959 - mae: 256.315 - mean_q: 320.237 Interval 2661 (1330000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7443 4 episodes - episode_reward: -171.465 [-235.842, -127.164] - loss: 329.530 - mae: 269.622 - mean_q: 339.042 Interval 2662 (1330500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8327 5 episodes - episode_reward: -212.117 [-389.041, -94.444] - loss: 276.907 - mae: 271.547 - mean_q: 340.059 Interval 2663 (1331000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2046 3 episodes - episode_reward: -367.977 [-496.492, -275.851] - loss: 390.137 - mae: 276.131 - mean_q: 346.787 Interval 2664 (1331500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7673 2 episodes - episode_reward: -149.604 [-247.421, -51.787] - loss: 431.319 - mae: 290.305 - mean_q: 365.823 Interval 2665 (1332000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1314 4 episodes - episode_reward: -261.159 [-299.095, -205.844] - loss: 237.021 - mae: 297.246 - mean_q: 375.754 Interval 2666 (1332500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8450 2 episodes - episode_reward: -191.910 [-251.199, -132.620] - loss: 305.728 - mae: 301.372 - mean_q: 380.360 Interval 2667 (1333000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4200 5 episodes - episode_reward: -370.334 [-557.759, -132.559] - loss: 377.274 - mae: 304.167 - mean_q: 385.505 Interval 2668 (1333500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8730 2 episodes - episode_reward: -672.175 [-735.679, -608.670] - loss: 267.871 - mae: 312.620 - mean_q: 398.930 Interval 2669 (1334000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4920 3 episodes - episode_reward: -253.074 [-289.025, -202.789] - loss: 346.444 - mae: 314.281 - mean_q: 399.962 Interval 2670 (1334500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9512 3 episodes - episode_reward: -356.373 [-545.735, -158.846] - loss: 310.106 - mae: 320.944 - mean_q: 409.171 Interval 2671 (1335000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3999 5 episodes - episode_reward: -437.019 [-890.563, -127.189] - loss: 323.312 - mae: 323.757 - mean_q: 412.245 Interval 2672 (1335500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5283 4 episodes - episode_reward: -298.214 [-588.972, -117.208] - loss: 251.103 - mae: 326.833 - mean_q: 416.412 Interval 2673 (1336000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9045 1 episodes - episode_reward: -412.400 [-412.400, -412.400] - loss: 249.514 - mae: 335.372 - mean_q: 429.322 Interval 2674 (1336500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6833 2 episodes - episode_reward: -389.900 [-498.592, -281.209] - loss: 369.667 - mae: 338.776 - mean_q: 435.049 Interval 2675 (1337000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8721 3 episodes - episode_reward: -160.102 [-282.541, -85.848] - loss: 346.217 - mae: 350.476 - mean_q: 451.317 Interval 2676 (1337500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5452 2 episodes - episode_reward: -447.623 [-615.098, -280.147] - loss: 276.632 - mae: 353.999 - mean_q: 460.099 Interval 2677 (1338000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0978 2 episodes - episode_reward: -209.772 [-319.544, -100.000] - loss: 453.122 - mae: 358.263 - mean_q: 465.061 Interval 2678 (1338500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2610 Interval 2679 (1339000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3087 2 episodes - episode_reward: -458.753 [-663.041, -254.464] - loss: 336.504 - mae: 385.746 - mean_q: 503.030 Interval 2680 (1339500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4239 1 episodes - episode_reward: -189.202 [-189.202, -189.202] - loss: 409.397 - mae: 408.635 - mean_q: 536.029 Interval 2681 (1340000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9499 3 episodes - episode_reward: -164.374 [-255.812, -104.917] - loss: 405.168 - mae: 425.732 - mean_q: 560.759 Interval 2682 (1340500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1556 1 episodes - episode_reward: -511.871 [-511.871, -511.871] - loss: 490.922 - mae: 446.860 - mean_q: 590.107 Interval 2683 (1341000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7944 1 episodes - episode_reward: -289.604 [-289.604, -289.604] - loss: 523.752 - mae: 462.122 - mean_q: 612.800 Interval 2684 (1341500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0631 1 episodes - episode_reward: -461.863 [-461.863, -461.863] - loss: 577.665 - mae: 490.561 - mean_q: 652.849 Interval 2685 (1342000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5913 1 episodes - episode_reward: -495.466 [-495.466, -495.466] - loss: 592.502 - mae: 511.598 - mean_q: 681.455 Interval 2686 (1342500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7926 1 episodes - episode_reward: -353.950 [-353.950, -353.950] - loss: 598.938 - mae: 534.238 - mean_q: 715.401 Interval 2687 (1343000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8057 1 episodes - episode_reward: -453.359 [-453.359, -453.359] - loss: 817.680 - mae: 554.608 - mean_q: 743.262 Interval 2688 (1343500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9275 1 episodes - episode_reward: -494.689 [-494.689, -494.689] - loss: 620.227 - mae: 565.212 - mean_q: 757.502 Interval 2689 (1344000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7210 3 episodes - episode_reward: -561.775 [-1210.422, -183.952] - loss: 802.372 - mae: 582.739 - mean_q: 782.527 Interval 2690 (1344500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6581 3 episodes - episode_reward: -644.236 [-1552.465, -105.432] - loss: 859.420 - mae: 594.600 - mean_q: 797.175 Interval 2691 (1345000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2924 3 episodes - episode_reward: -409.727 [-702.491, -201.018] - loss: 820.928 - mae: 602.194 - mean_q: 807.238 Interval 2692 (1345500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8170 1 episodes - episode_reward: -520.298 [-520.298, -520.298] - loss: 791.925 - mae: 624.798 - mean_q: 840.185 Interval 2693 (1346000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7455 1 episodes - episode_reward: -1232.923 [-1232.923, -1232.923] - loss: 976.267 - mae: 644.080 - mean_q: 865.301 Interval 2694 (1346500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0881 1 episodes - episode_reward: -929.649 [-929.649, -929.649] - loss: 1133.877 - mae: 653.000 - mean_q: 877.048 Interval 2695 (1347000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1432 1 episodes - episode_reward: -1264.093 [-1264.093, -1264.093] - loss: 1326.830 - mae: 669.035 - mean_q: 898.419 Interval 2696 (1347500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.1953 2 episodes - episode_reward: -1053.763 [-2013.127, -94.398] - loss: 1205.484 - mae: 682.346 - mean_q: 918.132 Interval 2697 (1348000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4914 2 episodes - episode_reward: -729.519 [-1337.047, -121.992] - loss: 1291.653 - mae: 683.551 - mean_q: 917.438 Interval 2698 (1348500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2072 1 episodes - episode_reward: -1243.580 [-1243.580, -1243.580] - loss: 925.953 - mae: 689.231 - mean_q: 925.354 Interval 2699 (1349000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5564 Interval 2700 (1349500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.9083 1 episodes - episode_reward: -1897.458 [-1897.458, -1897.458] - loss: 1075.897 - mae: 711.342 - mean_q: 953.959 Interval 2701 (1350000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -3.4616 3 episodes - episode_reward: -692.402 [-1826.584, -100.000] - loss: 1181.673 - mae: 711.939 - mean_q: 954.948 Interval 2702 (1350500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5662 Interval 2703 (1351000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.0991 1 episodes - episode_reward: -1319.003 [-1319.003, -1319.003] - loss: 878.186 - mae: 728.579 - mean_q: 977.761 Interval 2704 (1351500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -9.4522 2 episodes - episode_reward: -2595.916 [-5071.981, -119.851] - loss: 909.644 - mae: 729.256 - mean_q: 973.873 Interval 2705 (1352000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2022 Interval 2706 (1352500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.8375 2 episodes - episode_reward: -1150.575 [-2201.150, -100.000] - loss: 1168.861 - mae: 761.112 - mean_q: 1006.672 Interval 2707 (1353000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.2926 Interval 2708 (1353500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0254 2 episodes - episode_reward: -478.277 [-930.394, -26.160] - loss: 1396.067 - mae: 787.793 - mean_q: 1047.214 Interval 2709 (1354000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8750 Interval 2710 (1354500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7412 1 episodes - episode_reward: -1098.950 [-1098.950, -1098.950] - loss: 1180.885 - mae: 825.413 - mean_q: 1099.921 Interval 2711 (1355000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7527 Interval 2712 (1355500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0857 Interval 2713 (1356000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8225 1 episodes - episode_reward: -743.534 [-743.534, -743.534] - loss: 1502.828 - mae: 880.261 - mean_q: 1170.269 Interval 2714 (1356500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6243 Interval 2715 (1357000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3450 1 episodes - episode_reward: -715.153 [-715.153, -715.153] - loss: 2003.400 - mae: 915.330 - mean_q: 1215.698 Interval 2716 (1357500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9991 1 episodes - episode_reward: -162.628 [-162.628, -162.628] - loss: 1757.037 - mae: 922.334 - mean_q: 1224.127 Interval 2717 (1358000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0016 Interval 2718 (1358500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4188 Interval 2719 (1359000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0986 Interval 2720 (1359500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2023 Interval 2721 (1360000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9670 3 episodes - episode_reward: -376.848 [-879.630, -37.141] - loss: 1821.394 - mae: 990.508 - mean_q: 1308.915 Interval 2722 (1360500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1206 2 episodes - episode_reward: -221.267 [-249.017, -193.517] - loss: 2073.807 - mae: 1014.117 - mean_q: 1340.121 Interval 2723 (1361000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8357 1 episodes - episode_reward: -595.135 [-595.135, -595.135] - loss: 1976.880 - mae: 1031.397 - mean_q: 1368.209 Interval 2724 (1361500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2794 4 episodes - episode_reward: -167.797 [-264.860, -14.883] - loss: 2822.313 - mae: 1043.649 - mean_q: 1380.991 Interval 2725 (1362000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5166 3 episodes - episode_reward: -184.264 [-287.746, -81.421] - loss: 2638.015 - mae: 1057.315 - mean_q: 1402.806 Interval 2726 (1362500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0635 6 episodes - episode_reward: -117.383 [-292.169, -35.971] - loss: 3133.986 - mae: 1077.173 - mean_q: 1428.578 Interval 2727 (1363000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8415 5 episodes - episode_reward: -151.997 [-190.615, -111.774] - loss: 2751.081 - mae: 1087.058 - mean_q: 1443.844 Interval 2728 (1363500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9678 3 episodes - episode_reward: -212.040 [-293.995, -102.337] - loss: 3266.131 - mae: 1109.787 - mean_q: 1473.095 Interval 2729 (1364000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9082 2 episodes - episode_reward: -222.828 [-244.524, -201.132] - loss: 3608.346 - mae: 1131.266 - mean_q: 1503.515 Interval 2730 (1364500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3917 3 episodes - episode_reward: -231.187 [-414.030, -134.631] - loss: 3214.323 - mae: 1137.061 - mean_q: 1510.461 Interval 2731 (1365000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0938 2 episodes - episode_reward: -231.689 [-365.608, -97.769] - loss: 3648.830 - mae: 1124.872 - mean_q: 1492.171 Interval 2732 (1365500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8520 3 episodes - episode_reward: -330.962 [-372.083, -266.946] - loss: 3169.133 - mae: 1155.812 - mean_q: 1536.678 Interval 2733 (1366000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2971 3 episodes - episode_reward: -380.021 [-531.241, -117.664] - loss: 2971.100 - mae: 1136.653 - mean_q: 1509.662 Interval 2734 (1366500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4751 3 episodes - episode_reward: -435.360 [-502.979, -337.075] - loss: 3621.186 - mae: 1121.074 - mean_q: 1490.470 Interval 2735 (1367000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1079 4 episodes - episode_reward: -269.156 [-611.965, -113.104] - loss: 2421.623 - mae: 1124.767 - mean_q: 1495.865 Interval 2736 (1367500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5670 2 episodes - episode_reward: -288.313 [-507.496, -69.129] - loss: 2733.842 - mae: 1124.246 - mean_q: 1495.798 Interval 2737 (1368000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5761 5 episodes - episode_reward: -234.998 [-331.764, -186.359] - loss: 2713.833 - mae: 1115.153 - mean_q: 1481.865 Interval 2738 (1368500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.9521 5 episodes - episode_reward: -439.776 [-616.381, -93.397] - loss: 2625.232 - mae: 1105.859 - mean_q: 1468.129 Interval 2739 (1369000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0827 4 episodes - episode_reward: -368.415 [-429.425, -294.803] - loss: 2392.732 - mae: 1095.275 - mean_q: 1456.919 Interval 2740 (1369500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0829 4 episodes - episode_reward: -397.229 [-541.982, -135.905] - loss: 2101.640 - mae: 1095.980 - mean_q: 1457.191 Interval 2741 (1370000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9313 3 episodes - episode_reward: -484.283 [-511.415, -458.168] - loss: 2825.982 - mae: 1079.305 - mean_q: 1433.093 Interval 2742 (1370500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1096 5 episodes - episode_reward: -324.847 [-513.470, -100.000] - loss: 2288.594 - mae: 1069.329 - mean_q: 1419.529 Interval 2743 (1371000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5192 1 episodes - episode_reward: -627.205 [-627.205, -627.205] - loss: 3633.926 - mae: 1052.835 - mean_q: 1396.724 Interval 2744 (1371500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8658 3 episodes - episode_reward: -318.281 [-546.976, -100.000] - loss: 2186.979 - mae: 1057.102 - mean_q: 1401.414 Interval 2745 (1372000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8250 Interval 2746 (1372500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2341 1 episodes - episode_reward: -1093.899 [-1093.899, -1093.899] - loss: 2254.677 - mae: 1071.624 - mean_q: 1420.004 Interval 2747 (1373000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1886 Interval 2748 (1373500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1000 1 episodes - episode_reward: -265.559 [-265.559, -265.559] - loss: 3289.263 - mae: 1069.950 - mean_q: 1417.478 Interval 2749 (1374000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2707 1 episodes - episode_reward: -1333.969 [-1333.969, -1333.969] - loss: 1851.545 - mae: 1072.633 - mean_q: 1420.867 Interval 2750 (1374500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9321 Interval 2751 (1375000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8080 2 episodes - episode_reward: -712.596 [-1269.402, -155.790] - loss: 2447.442 - mae: 1066.655 - mean_q: 1413.061 Interval 2752 (1375500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5211 Interval 2753 (1376000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2492 Interval 2754 (1376500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1463 Interval 2755 (1377000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1085 4 episodes - episode_reward: -456.532 [-1354.191, -100.000] - loss: 3050.894 - mae: 1078.164 - mean_q: 1429.040 Interval 2756 (1377500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9571 1 episodes - episode_reward: -709.628 [-709.628, -709.628] - loss: 2112.035 - mae: 1068.576 - mean_q: 1418.408 Interval 2757 (1378000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.5889 Interval 2758 (1378500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0819 1 episodes - episode_reward: -933.068 [-933.068, -933.068] - loss: 2358.191 - mae: 1095.207 - mean_q: 1459.984 Interval 2759 (1379000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7317 Interval 2760 (1379500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.3314 Interval 2761 (1380000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9382 1 episodes - episode_reward: -794.315 [-794.315, -794.315] - loss: 2439.868 - mae: 1107.052 - mean_q: 1478.574 Interval 2762 (1380500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3965 Interval 2763 (1381000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4389 2 episodes - episode_reward: -375.225 [-550.344, -200.105] - loss: 2676.430 - mae: 1119.067 - mean_q: 1494.140 Interval 2764 (1381500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.8674 6 episodes - episode_reward: -354.376 [-572.472, -114.992] - loss: 2423.904 - mae: 1111.904 - mean_q: 1485.428 Interval 2765 (1382000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2506 1 episodes - episode_reward: -498.930 [-498.930, -498.930] - loss: 2650.820 - mae: 1118.922 - mean_q: 1494.642 Interval 2766 (1382500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9939 1 episodes - episode_reward: -463.983 [-463.983, -463.983] - loss: 2430.353 - mae: 1117.073 - mean_q: 1491.240 Interval 2767 (1383000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6070 2 episodes - episode_reward: -548.606 [-587.636, -509.575] - loss: 2675.520 - mae: 1119.161 - mean_q: 1493.721 Interval 2768 (1383500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7159 1 episodes - episode_reward: -463.483 [-463.483, -463.483] - loss: 2251.472 - mae: 1123.103 - mean_q: 1498.374 Interval 2769 (1384000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7329 2 episodes - episode_reward: -334.345 [-592.660, -76.029] - loss: 2029.321 - mae: 1124.444 - mean_q: 1500.050 Interval 2770 (1384500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5531 3 episodes - episode_reward: -646.884 [-894.773, -343.870] - loss: 3916.615 - mae: 1138.596 - mean_q: 1519.736 Interval 2771 (1385000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6836 1 episodes - episode_reward: -606.480 [-606.480, -606.480] - loss: 2154.826 - mae: 1151.174 - mean_q: 1540.635 Interval 2772 (1385500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0704 2 episodes - episode_reward: -246.255 [-523.576, 31.065] - loss: 2828.990 - mae: 1169.604 - mean_q: 1567.966 Interval 2773 (1386000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1797 Interval 2774 (1386500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2447 Interval 2775 (1387000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1581 Interval 2776 (1387500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.0601 Interval 2777 (1388000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1888 Interval 2778 (1388500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2307 Interval 2779 (1389000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: 0.0965 Interval 2780 (1389500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.1506 Interval 2781 (1390000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.3977 Interval 2782 (1390500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0173 Interval 2783 (1391000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.6829 Interval 2784 (1391500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2114 Interval 2785 (1392000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.0872 Interval 2786 (1392500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2031 Interval 2787 (1393000 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1972 Interval 2788 (1393500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.3948 Interval 2789 (1394000 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: 0.1504 Interval 2790 (1394500 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.3245 Interval 2791 (1395000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.2251 Interval 2792 (1395500 steps performed) 500/500 [==============================] - 10s 19ms/step - reward: -0.1247 Interval 2793 (1396000 steps performed) 500/500 [==============================] - 10s 19ms/step - reward: -0.2518 Interval 2794 (1396500 steps performed) 500/500 [==============================] - 9s 17ms/step - reward: -1.0584 1 episodes - episode_reward: -2587.970 [-2587.970, -2587.970] - loss: 4349.488 - mae: 1562.712 - mean_q: 2116.498 Interval 2795 (1397000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6990 Interval 2796 (1397500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.0087 Interval 2797 (1398000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0109 2 episodes - episode_reward: -664.333 [-1005.524, -323.142] - loss: 4568.687 - mae: 1636.543 - mean_q: 2210.674 Interval 2798 (1398500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.1624 3 episodes - episode_reward: -1081.490 [-1605.054, -41.897] - loss: 4113.914 - mae: 1656.211 - mean_q: 2236.186 Interval 2799 (1399000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9324 1 episodes - episode_reward: -1491.296 [-1491.296, -1491.296] - loss: 4466.825 - mae: 1674.397 - mean_q: 2259.432 Interval 2800 (1399500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8181 1 episodes - episode_reward: -1351.329 [-1351.329, -1351.329] - loss: 4137.754 - mae: 1674.694 - mean_q: 2255.624 Interval 2801 (1400000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.1225 2 episodes - episode_reward: -929.367 [-1391.104, -467.630] - loss: 4519.567 - mae: 1668.496 - mean_q: 2248.877 Interval 2802 (1400500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0331 4 episodes - episode_reward: -548.050 [-783.681, -94.140] - loss: 4103.547 - mae: 1685.688 - mean_q: 2270.673 Interval 2803 (1401000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6845 6 episodes - episode_reward: -317.279 [-836.260, -146.093] - loss: 3919.322 - mae: 1682.423 - mean_q: 2266.840 Interval 2804 (1401500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3383 3 episodes - episode_reward: -374.094 [-493.720, -255.621] - loss: 4445.471 - mae: 1667.775 - mean_q: 2245.547 Interval 2805 (1402000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4360 4 episodes - episode_reward: -307.383 [-425.489, -114.232] - loss: 4228.290 - mae: 1669.704 - mean_q: 2248.655 Interval 2806 (1402500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8783 3 episodes - episode_reward: -300.569 [-536.394, -155.676] - loss: 3804.146 - mae: 1680.296 - mean_q: 2264.390 Interval 2807 (1403000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.1398 3 episodes - episode_reward: -893.302 [-1445.594, -567.192] - loss: 5272.388 - mae: 1678.136 - mean_q: 2263.757 Interval 2808 (1403500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.9132 2 episodes - episode_reward: -879.143 [-1258.333, -499.953] - loss: 3744.254 - mae: 1716.588 - mean_q: 2323.397 Interval 2809 (1404000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.8449 4 episodes - episode_reward: -998.362 [-1469.485, -498.744] - loss: 4306.799 - mae: 1714.087 - mean_q: 2319.700 Interval 2810 (1404500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.5321 5 episodes - episode_reward: -570.420 [-1651.719, -112.826] - loss: 4501.037 - mae: 1783.669 - mean_q: 2414.829 Interval 2811 (1405000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8165 1 episodes - episode_reward: -214.081 [-214.081, -214.081] - loss: 5753.311 - mae: 1807.024 - mean_q: 2444.801 Interval 2812 (1405500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0339 3 episodes - episode_reward: -647.913 [-953.667, -268.061] - loss: 5344.950 - mae: 1825.841 - mean_q: 2471.820 Interval 2813 (1406000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5580 3 episodes - episode_reward: -489.069 [-666.322, -169.436] - loss: 5947.341 - mae: 1866.806 - mean_q: 2520.800 Interval 2814 (1406500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.7456 5 episodes - episode_reward: -467.219 [-779.257, -117.958] - loss: 6519.416 - mae: 1841.519 - mean_q: 2482.775 Interval 2815 (1407000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.7809 4 episodes - episode_reward: -454.695 [-585.720, -100.000] - loss: 5503.705 - mae: 1848.139 - mean_q: 2491.213 Interval 2816 (1407500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.6493 2 episodes - episode_reward: -970.159 [-1071.489, -868.828] - loss: 7055.372 - mae: 1851.852 - mean_q: 2493.167 Interval 2817 (1408000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6766 2 episodes - episode_reward: -462.568 [-604.002, -321.134] - loss: 6297.081 - mae: 1840.973 - mean_q: 2473.568 Interval 2818 (1408500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7545 1 episodes - episode_reward: -1105.799 [-1105.799, -1105.799] - loss: 6586.708 - mae: 1803.997 - mean_q: 2419.769 Interval 2819 (1409000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0558 1 episodes - episode_reward: -874.353 [-874.353, -874.353] - loss: 5945.962 - mae: 1829.900 - mean_q: 2458.299 Interval 2820 (1409500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3537 1 episodes - episode_reward: -1186.809 [-1186.809, -1186.809] - loss: 8009.004 - mae: 1791.411 - mean_q: 2405.843 Interval 2821 (1410000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8593 1 episodes - episode_reward: -1220.131 [-1220.131, -1220.131] - loss: 6281.511 - mae: 1792.296 - mean_q: 2410.298 Interval 2822 (1410500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7160 Interval 2823 (1411000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4219 1 episodes - episode_reward: -2337.779 [-2337.779, -2337.779] - loss: 7068.137 - mae: 1760.804 - mean_q: 2372.822 Interval 2824 (1411500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2222 1 episodes - episode_reward: -1441.263 [-1441.263, -1441.263] - loss: 6392.989 - mae: 1743.231 - mean_q: 2350.213 Interval 2825 (1412000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1558 Interval 2826 (1412500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9914 1 episodes - episode_reward: -1940.052 [-1940.052, -1940.052] - loss: 5368.284 - mae: 1756.056 - mean_q: 2368.104 Interval 2827 (1413000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.6054 Interval 2828 (1413500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6223 1 episodes - episode_reward: -2002.217 [-2002.217, -2002.217] - loss: 5181.979 - mae: 1756.723 - mean_q: 2369.217 Interval 2829 (1414000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8181 1 episodes - episode_reward: -2095.936 [-2095.936, -2095.936] - loss: 9183.938 - mae: 1739.341 - mean_q: 2343.378 Interval 2830 (1414500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0433 Interval 2831 (1415000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5104 1 episodes - episode_reward: -1805.845 [-1805.845, -1805.845] - loss: 5570.193 - mae: 1719.046 - mean_q: 2318.459 Interval 2832 (1415500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.2025 1 episodes - episode_reward: -2875.467 [-2875.467, -2875.467] - loss: 6084.876 - mae: 1700.276 - mean_q: 2291.388 Interval 2833 (1416000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.3321 Interval 2834 (1416500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2193 1 episodes - episode_reward: -3103.430 [-3103.430, -3103.430] - loss: 7033.963 - mae: 1683.539 - mean_q: 2263.482 Interval 2835 (1417000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -13.0164 Interval 2836 (1417500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0915 2 episodes - episode_reward: -4235.541 [-8284.450, -186.631] - loss: 6915.569 - mae: 1610.893 - mean_q: 2157.118 Interval 2837 (1418000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.6435 3 episodes - episode_reward: -1250.938 [-1926.315, -116.370] - loss: 4751.219 - mae: 1599.136 - mean_q: 2144.220 Interval 2838 (1418500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0478 1 episodes - episode_reward: -1219.394 [-1219.394, -1219.394] - loss: 5198.136 - mae: 1564.281 - mean_q: 2094.498 Interval 2839 (1419000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3974 1 episodes - episode_reward: -610.232 [-610.232, -610.232] - loss: 5498.284 - mae: 1564.552 - mean_q: 2092.073 Interval 2840 (1419500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1072 2 episodes - episode_reward: -775.919 [-1451.839, -100.000] - loss: 5838.890 - mae: 1559.452 - mean_q: 2081.664 Interval 2841 (1420000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5889 1 episodes - episode_reward: -1304.764 [-1304.764, -1304.764] - loss: 5485.067 - mae: 1544.167 - mean_q: 2062.564 Interval 2842 (1420500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7074 1 episodes - episode_reward: -1849.240 [-1849.240, -1849.240] - loss: 5041.945 - mae: 1548.739 - mean_q: 2063.242 Interval 2843 (1421000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0061 2 episodes - episode_reward: -742.562 [-1340.300, -144.823] - loss: 5130.111 - mae: 1522.955 - mean_q: 2025.102 Interval 2844 (1421500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8239 2 episodes - episode_reward: -542.827 [-833.407, -252.248] - loss: 5141.600 - mae: 1495.012 - mean_q: 1983.035 Interval 2845 (1422000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3520 Interval 2846 (1422500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8966 3 episodes - episode_reward: -806.110 [-1761.135, -269.129] - loss: 4932.679 - mae: 1452.280 - mean_q: 1926.107 Interval 2847 (1423000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8405 1 episodes - episode_reward: -294.827 [-294.827, -294.827] - loss: 5191.923 - mae: 1443.830 - mean_q: 1909.179 Interval 2848 (1423500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7568 1 episodes - episode_reward: -957.711 [-957.711, -957.711] - loss: 5263.294 - mae: 1402.389 - mean_q: 1850.793 Interval 2849 (1424000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0513 3 episodes - episode_reward: -531.030 [-903.359, -218.259] - loss: 5427.954 - mae: 1385.704 - mean_q: 1828.726 Interval 2850 (1424500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3698 1 episodes - episode_reward: -145.620 [-145.620, -145.620] - loss: 5071.533 - mae: 1372.472 - mean_q: 1809.188 Interval 2851 (1425000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2545 1 episodes - episode_reward: -535.087 [-535.087, -535.087] - loss: 4842.904 - mae: 1378.565 - mean_q: 1821.123 Interval 2852 (1425500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7306 4 episodes - episode_reward: -331.676 [-359.411, -291.970] - loss: 4536.392 - mae: 1350.525 - mean_q: 1785.390 Interval 2853 (1426000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4868 1 episodes - episode_reward: -448.918 [-448.918, -448.918] - loss: 6049.103 - mae: 1369.323 - mean_q: 1811.085 Interval 2854 (1426500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6848 1 episodes - episode_reward: -921.350 [-921.350, -921.350] - loss: 5575.578 - mae: 1376.284 - mean_q: 1824.173 Interval 2855 (1427000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8705 2 episodes - episode_reward: -840.185 [-959.679, -720.691] - loss: 5811.374 - mae: 1407.126 - mean_q: 1868.322 Interval 2856 (1427500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6674 1 episodes - episode_reward: -463.079 [-463.079, -463.079] - loss: 7123.670 - mae: 1437.817 - mean_q: 1912.479 Interval 2857 (1428000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.1408 4 episodes - episode_reward: -900.069 [-2333.818, -100.000] - loss: 7411.620 - mae: 1468.406 - mean_q: 1955.767 Interval 2858 (1428500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.1234 2 episodes - episode_reward: -993.439 [-1706.495, -280.382] - loss: 9322.308 - mae: 1528.423 - mean_q: 2035.082 Interval 2859 (1429000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2826 4 episodes - episode_reward: -415.971 [-618.058, -98.476] - loss: 16720.152 - mae: 1576.452 - mean_q: 2099.669 Interval 2860 (1429500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0664 4 episodes - episode_reward: -242.425 [-433.994, -156.346] - loss: 11819.376 - mae: 1614.138 - mean_q: 2150.698 Interval 2861 (1430000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2492 5 episodes - episode_reward: -235.312 [-464.417, -87.582] - loss: 11195.359 - mae: 1630.433 - mean_q: 2172.257 Interval 2862 (1430500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5838 2 episodes - episode_reward: -461.897 [-603.211, -320.583] - loss: 11143.054 - mae: 1634.546 - mean_q: 2170.165 Interval 2863 (1431000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -6.9726 1 episodes - episode_reward: -3586.860 [-3586.860, -3586.860] - loss: 10729.522 - mae: 1617.099 - mean_q: 2144.706 Interval 2864 (1431500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9539 3 episodes - episode_reward: -396.752 [-725.979, -150.297] - loss: 29783.623 - mae: 1640.345 - mean_q: 2176.800 Interval 2865 (1432000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1251 2 episodes - episode_reward: -292.852 [-591.784, 6.079] - loss: 10720.370 - mae: 1611.093 - mean_q: 2140.305 Interval 2866 (1432500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6775 1 episodes - episode_reward: -360.943 [-360.943, -360.943] - loss: 22489.980 - mae: 1620.263 - mean_q: 2150.622 Interval 2867 (1433000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2221 1 episodes - episode_reward: -1441.442 [-1441.442, -1441.442] - loss: 10846.491 - mae: 1699.810 - mean_q: 2272.485 Interval 2868 (1433500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.3119 1 episodes - episode_reward: -1581.527 [-1581.527, -1581.527] - loss: 11971.498 - mae: 1749.753 - mean_q: 2340.674 Interval 2869 (1434000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9806 1 episodes - episode_reward: -420.030 [-420.030, -420.030] - loss: 22290.893 - mae: 1829.736 - mean_q: 2445.533 Interval 2870 (1434500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5454 2 episodes - episode_reward: -375.819 [-605.373, -146.264] - loss: 12061.993 - mae: 1912.864 - mean_q: 2564.679 Interval 2871 (1435000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3045 2 episodes - episode_reward: -458.616 [-594.764, -322.469] - loss: 17668.248 - mae: 1999.099 - mean_q: 2684.382 Interval 2872 (1435500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1639 Interval 2873 (1436000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.7729 1 episodes - episode_reward: -849.675 [-849.675, -849.675] - loss: 16121.092 - mae: 2157.913 - mean_q: 2897.292 Interval 2874 (1436500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2428 Interval 2875 (1437000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9690 2 episodes - episode_reward: -790.925 [-1421.677, -160.173] - loss: 18731.846 - mae: 2292.894 - mean_q: 3076.302 Interval 2876 (1437500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6857 1 episodes - episode_reward: -1602.638 [-1602.638, -1602.638] - loss: 17499.521 - mae: 2355.372 - mean_q: 3156.249 Interval 2877 (1438000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5497 Interval 2878 (1438500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8729 1 episodes - episode_reward: -968.405 [-968.405, -968.405] - loss: 19146.449 - mae: 2483.627 - mean_q: 3326.792 Interval 2879 (1439000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8159 1 episodes - episode_reward: -1047.376 [-1047.376, -1047.376] - loss: 13667.865 - mae: 2476.080 - mean_q: 3313.096 Interval 2880 (1439500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9280 2 episodes - episode_reward: -471.698 [-843.395, -100.000] - loss: 14249.914 - mae: 2526.354 - mean_q: 3376.878 Interval 2881 (1440000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7333 1 episodes - episode_reward: -857.560 [-857.560, -857.560] - loss: 13606.039 - mae: 2546.739 - mean_q: 3404.528 Interval 2882 (1440500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7542 4 episodes - episode_reward: -467.492 [-788.104, -200.551] - loss: 14150.060 - mae: 2592.021 - mean_q: 3464.252 Interval 2883 (1441000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0467 2 episodes - episode_reward: -541.000 [-685.083, -396.917] - loss: 14173.941 - mae: 2626.152 - mean_q: 3518.024 Interval 2884 (1441500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4199 2 episodes - episode_reward: -650.961 [-868.915, -433.007] - loss: 13464.192 - mae: 2658.492 - mean_q: 3557.568 Interval 2885 (1442000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.5967 1 episodes - episode_reward: -1109.372 [-1109.372, -1109.372] - loss: 13742.591 - mae: 2696.352 - mean_q: 3613.522 Interval 2886 (1442500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9570 3 episodes - episode_reward: -409.778 [-821.580, -201.882] - loss: 14608.925 - mae: 2698.703 - mean_q: 3613.534 Interval 2887 (1443000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4038 3 episodes - episode_reward: -860.682 [-1046.588, -749.438] - loss: 13453.632 - mae: 2691.883 - mean_q: 3611.756 Interval 2888 (1443500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2435 2 episodes - episode_reward: -658.027 [-707.734, -608.319] - loss: 15133.646 - mae: 2696.139 - mean_q: 3621.331 Interval 2889 (1444000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1710 3 episodes - episode_reward: -556.668 [-793.390, -146.048] - loss: 12343.810 - mae: 2680.927 - mean_q: 3603.075 Interval 2890 (1444500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2396 4 episodes - episode_reward: -521.001 [-863.414, -193.818] - loss: 14927.087 - mae: 2744.374 - mean_q: 3691.008 Interval 2891 (1445000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0132 3 episodes - episode_reward: -752.579 [-1235.086, -381.818] - loss: 14157.429 - mae: 2700.744 - mean_q: 3633.345 Interval 2892 (1445500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3657 3 episodes - episode_reward: -555.707 [-823.133, -149.837] - loss: 12656.996 - mae: 2702.762 - mean_q: 3640.600 Interval 2893 (1446000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9147 1 episodes - episode_reward: -765.399 [-765.399, -765.399] - loss: 12443.136 - mae: 2665.242 - mean_q: 3590.899 Interval 2894 (1446500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8197 2 episodes - episode_reward: -690.534 [-703.781, -677.288] - loss: 14340.837 - mae: 2673.597 - mean_q: 3601.741 Interval 2895 (1447000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4526 5 episodes - episode_reward: -299.858 [-470.559, -100.000] - loss: 15576.627 - mae: 2657.826 - mean_q: 3581.131 Interval 2896 (1447500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.7149 4 episodes - episode_reward: -569.003 [-1121.915, -120.342] - loss: 17553.418 - mae: 2647.196 - mean_q: 3568.937 Interval 2897 (1448000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.6270 3 episodes - episode_reward: -611.509 [-930.776, -184.219] - loss: 21547.766 - mae: 2557.301 - mean_q: 3449.897 Interval 2898 (1448500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -7.7554 3 episodes - episode_reward: -1258.194 [-2190.528, -759.190] - loss: 12127.860 - mae: 2521.471 - mean_q: 3406.553 Interval 2899 (1449000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.4421 1 episodes - episode_reward: -2048.497 [-2048.497, -2048.497] - loss: 12670.291 - mae: 2449.625 - mean_q: 3312.948 Interval 2900 (1449500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2382 1 episodes - episode_reward: -888.684 [-888.684, -888.684] - loss: 19069.014 - mae: 2393.184 - mean_q: 3234.092 Interval 2901 (1450000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4818 1 episodes - episode_reward: -1029.260 [-1029.260, -1029.260] - loss: 12531.587 - mae: 2386.958 - mean_q: 3235.461 Interval 2902 (1450500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.6102 3 episodes - episode_reward: -1129.838 [-2250.739, -85.899] - loss: 12991.750 - mae: 2372.060 - mean_q: 3215.232 Interval 2903 (1451000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8058 Interval 2904 (1451500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2658 Interval 2905 (1452000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4866 1 episodes - episode_reward: -908.486 [-908.486, -908.486] - loss: 12986.441 - mae: 2313.329 - mean_q: 3134.941 Interval 2906 (1452500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7542 1 episodes - episode_reward: -1652.146 [-1652.146, -1652.146] - loss: 12446.959 - mae: 2299.853 - mean_q: 3112.520 Interval 2907 (1453000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.1423 Interval 2908 (1453500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 1.7871 1 episodes - episode_reward: -1194.936 [-1194.936, -1194.936] - loss: 10382.801 - mae: 2200.483 - mean_q: 2979.412 Interval 2909 (1454000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4653 2 episodes - episode_reward: -645.439 [-678.079, -612.800] - loss: 12069.117 - mae: 2164.218 - mean_q: 2928.314 Interval 2910 (1454500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9477 1 episodes - episode_reward: -1317.834 [-1317.834, -1317.834] - loss: 9320.226 - mae: 2129.977 - mean_q: 2878.843 Interval 2911 (1455000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.2427 Interval 2912 (1455500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.6469 1 episodes - episode_reward: -2621.780 [-2621.780, -2621.780] - loss: 10196.388 - mae: 2063.218 - mean_q: 2788.449 Interval 2913 (1456000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.8870 1 episodes - episode_reward: -2342.070 [-2342.070, -2342.070] - loss: 9440.737 - mae: 2006.759 - mean_q: 2713.665 Interval 2914 (1456500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.1429 1 episodes - episode_reward: -3228.658 [-3228.658, -3228.658] - loss: 9523.119 - mae: 1951.815 - mean_q: 2634.936 Interval 2915 (1457000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -5.8237 1 episodes - episode_reward: -3290.536 [-3290.536, -3290.536] - loss: 8552.101 - mae: 1918.867 - mean_q: 2591.376 Interval 2916 (1457500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1514 1 episodes - episode_reward: -1081.710 [-1081.710, -1081.710] - loss: 6905.477 - mae: 1864.602 - mean_q: 2515.565 Interval 2917 (1458000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.2752 1 episodes - episode_reward: -3145.957 [-3145.957, -3145.957] - loss: 8359.825 - mae: 1813.573 - mean_q: 2447.760 Interval 2918 (1458500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.8743 2 episodes - episode_reward: -2118.625 [-2855.011, -1382.239] - loss: 6881.102 - mae: 1768.589 - mean_q: 2386.827 Interval 2919 (1459000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.6033 1 episodes - episode_reward: -469.985 [-469.985, -469.985] - loss: 6827.574 - mae: 1718.797 - mean_q: 2317.484 Interval 2920 (1459500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.6617 2 episodes - episode_reward: -2368.394 [-4240.241, -496.547] - loss: 6295.867 - mae: 1679.058 - mean_q: 2264.669 Interval 2921 (1460000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.6996 1 episodes - episode_reward: -3313.872 [-3313.872, -3313.872] - loss: 7798.351 - mae: 1659.476 - mean_q: 2240.405 Interval 2922 (1460500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -8.5680 1 episodes - episode_reward: -3439.733 [-3439.733, -3439.733] - loss: 7764.808 - mae: 1627.433 - mean_q: 2197.389 Interval 2923 (1461000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -7.2307 1 episodes - episode_reward: -4747.884 [-4747.884, -4747.884] - loss: 6684.608 - mae: 1600.076 - mean_q: 2159.322 Interval 2924 (1461500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0257 1 episodes - episode_reward: -1916.163 [-1916.163, -1916.163] - loss: 7651.222 - mae: 1567.908 - mean_q: 2113.936 Interval 2925 (1462000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4154 2 episodes - episode_reward: -777.664 [-1354.918, -200.409] - loss: 6610.354 - mae: 1570.998 - mean_q: 2119.990 Interval 2926 (1462500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3342 1 episodes - episode_reward: -659.830 [-659.830, -659.830] - loss: 7352.843 - mae: 1559.048 - mean_q: 2106.700 Interval 2927 (1463000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7796 Interval 2928 (1463500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1147 3 episodes - episode_reward: -499.628 [-925.197, -139.844] - loss: 8006.799 - mae: 1562.847 - mean_q: 2112.138 Interval 2929 (1464000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3162 3 episodes - episode_reward: -622.301 [-1219.136, -130.518] - loss: 6981.870 - mae: 1579.753 - mean_q: 2135.489 Interval 2930 (1464500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2724 1 episodes - episode_reward: -672.114 [-672.114, -672.114] - loss: 7333.873 - mae: 1584.255 - mean_q: 2137.228 Interval 2931 (1465000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5896 2 episodes - episode_reward: -464.062 [-629.674, -298.450] - loss: 7568.821 - mae: 1561.934 - mean_q: 2103.980 Interval 2932 (1465500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7461 Interval 2933 (1466000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5745 2 episodes - episode_reward: -670.783 [-894.701, -446.866] - loss: 8509.093 - mae: 1522.973 - mean_q: 2045.724 Interval 2934 (1466500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1983 2 episodes - episode_reward: -352.616 [-503.854, -201.377] - loss: 9365.546 - mae: 1500.959 - mean_q: 2015.300 Interval 2935 (1467000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3683 6 episodes - episode_reward: -341.026 [-934.125, -100.000] - loss: 20114.244 - mae: 1484.119 - mean_q: 1990.183 Interval 2936 (1467500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9519 6 episodes - episode_reward: -224.302 [-410.429, -147.991] - loss: 10137.846 - mae: 1468.666 - mean_q: 1972.248 Interval 2937 (1468000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4588 5 episodes - episode_reward: -349.744 [-486.949, -180.001] - loss: 20135.920 - mae: 1461.316 - mean_q: 1960.054 Interval 2938 (1468500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2379 4 episodes - episode_reward: -385.704 [-697.918, -148.089] - loss: 17110.002 - mae: 1437.286 - mean_q: 1927.094 Interval 2939 (1469000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.9395 6 episodes - episode_reward: -369.555 [-619.847, -100.000] - loss: 16233.137 - mae: 1414.239 - mean_q: 1896.839 Interval 2940 (1469500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6442 6 episodes - episode_reward: -294.393 [-508.900, -100.468] - loss: 40107.094 - mae: 1459.273 - mean_q: 1957.806 Interval 2941 (1470000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0216 7 episodes - episode_reward: -293.719 [-495.926, -103.171] - loss: 24752.893 - mae: 1499.186 - mean_q: 2016.829 Interval 2942 (1470500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.5005 5 episodes - episode_reward: -206.407 [-363.700, -69.216] - loss: 25090.848 - mae: 1546.048 - mean_q: 2082.927 Interval 2943 (1471000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0652: 0s - reward: - 7 episodes - episode_reward: -314.366 [-468.592, -100.000] - loss: 29498.867 - mae: 1634.205 - mean_q: 2201.028 Interval 2944 (1471500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2049 5 episodes - episode_reward: -327.038 [-493.423, -87.830] - loss: 45330.820 - mae: 1699.624 - mean_q: 2287.781 Interval 2945 (1472000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.7043 4 episodes - episode_reward: -301.436 [-476.105, -84.409] - loss: 18756.258 - mae: 1759.777 - mean_q: 2379.850 Interval 2946 (1472500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9365 5 episodes - episode_reward: -327.110 [-572.568, -206.652] - loss: 21987.748 - mae: 1799.365 - mean_q: 2436.755 Interval 2947 (1473000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7451 5 episodes - episode_reward: -173.112 [-251.386, -88.585] - loss: 25693.016 - mae: 1918.587 - mean_q: 2611.749 Interval 2948 (1473500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9621 5 episodes - episode_reward: -92.842 [-161.640, -38.129] - loss: 32858.770 - mae: 2155.569 - mean_q: 2947.022 Interval 2949 (1474000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8144 5 episodes - episode_reward: -260.720 [-522.132, -100.000] - loss: 26647.914 - mae: 2357.795 - mean_q: 3229.743 Interval 2950 (1474500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0322 5 episodes - episode_reward: -128.931 [-306.711, -38.727] - loss: 40782.410 - mae: 2635.246 - mean_q: 3610.155 Interval 2951 (1475000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6152 3 episodes - episode_reward: -93.611 [-182.194, 11.404] - loss: 38878.090 - mae: 2902.227 - mean_q: 3983.081 Interval 2952 (1475500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4207 4 episodes - episode_reward: -423.224 [-566.773, -212.151] - loss: 32519.482 - mae: 3258.944 - mean_q: 4463.021 Interval 2953 (1476000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2741 5 episodes - episode_reward: -309.817 [-448.630, -151.829] - loss: 36045.449 - mae: 3558.929 - mean_q: 4864.683 Interval 2954 (1476500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4069 5 episodes - episode_reward: -236.682 [-507.318, -49.415] - loss: 32800.531 - mae: 3760.007 - mean_q: 5116.243 Interval 2955 (1477000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8221 5 episodes - episode_reward: -291.068 [-547.357, -90.310] - loss: 42605.488 - mae: 3935.528 - mean_q: 5334.922 Interval 2956 (1477500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3695 4 episodes - episode_reward: -436.045 [-725.072, -183.421] - loss: 41261.254 - mae: 4006.750 - mean_q: 5417.198 Interval 2957 (1478000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5514 3 episodes - episode_reward: -472.146 [-634.155, -276.543] - loss: 33559.699 - mae: 4102.932 - mean_q: 5533.574 Interval 2958 (1478500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.7288 6 episodes - episode_reward: -432.156 [-619.872, -200.419] - loss: 34610.742 - mae: 4009.446 - mean_q: 5404.638 Interval 2959 (1479000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2788 3 episodes - episode_reward: -468.847 [-608.817, -330.150] - loss: 39945.668 - mae: 3927.958 - mean_q: 5295.824 Interval 2960 (1479500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4183 6 episodes - episode_reward: -429.944 [-874.268, -177.833] - loss: 34756.609 - mae: 3981.425 - mean_q: 5365.189 Interval 2961 (1480000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.5839 4 episodes - episode_reward: -702.529 [-1539.537, -284.622] - loss: 36682.020 - mae: 3936.115 - mean_q: 5304.457 Interval 2962 (1480500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6578 6 episodes - episode_reward: -306.259 [-535.736, -121.302] - loss: 34209.660 - mae: 3838.530 - mean_q: 5173.867 Interval 2963 (1481000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.7256 6 episodes - episode_reward: -284.463 [-395.906, -100.000] - loss: 29673.162 - mae: 3851.352 - mean_q: 5197.989 Interval 2964 (1481500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5990 4 episodes - episode_reward: -479.213 [-727.230, -293.747] - loss: 28318.801 - mae: 3806.574 - mean_q: 5143.731 Interval 2965 (1482000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.7831 4 episodes - episode_reward: -433.383 [-481.609, -402.098] - loss: 44308.492 - mae: 3785.970 - mean_q: 5122.215 Interval 2966 (1482500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.9694 5 episodes - episode_reward: -432.517 [-644.127, -138.743] - loss: 52604.734 - mae: 3763.441 - mean_q: 5093.984 Interval 2967 (1483000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2597 2 episodes - episode_reward: -326.650 [-473.943, -179.358] - loss: 48538.121 - mae: 3855.910 - mean_q: 5229.312 Interval 2968 (1483500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.9718 3 episodes - episode_reward: -819.283 [-1280.659, -423.394] - loss: 50942.086 - mae: 3899.318 - mean_q: 5282.806 Interval 2969 (1484000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.8716 6 episodes - episode_reward: -397.041 [-729.697, -100.894] - loss: 44509.367 - mae: 3906.638 - mean_q: 5312.413 Interval 2970 (1484500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.7781 3 episodes - episode_reward: -446.229 [-654.235, -302.550] - loss: 47132.195 - mae: 3952.845 - mean_q: 5381.491 Interval 2971 (1485000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.9308 2 episodes - episode_reward: -1444.662 [-2480.325, -408.998] - loss: 60760.848 - mae: 4082.646 - mean_q: 5561.736 Interval 2972 (1485500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2040 5 episodes - episode_reward: -262.425 [-376.537, -101.640] - loss: 70317.023 - mae: 4169.965 - mean_q: 5675.680 Interval 2973 (1486000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8448 3 episodes - episode_reward: -314.650 [-745.126, -98.826] - loss: 67112.359 - mae: 4223.485 - mean_q: 5752.257 Interval 2974 (1486500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4853 1 episodes - episode_reward: -493.167 [-493.167, -493.167] - loss: 82170.273 - mae: 4372.363 - mean_q: 5977.102 Interval 2975 (1487000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6017 3 episodes - episode_reward: -294.965 [-477.501, -191.262] - loss: 85918.992 - mae: 4458.402 - mean_q: 6100.507 Interval 2976 (1487500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5736 1 episodes - episode_reward: -438.776 [-438.776, -438.776] - loss: 85510.250 - mae: 4640.775 - mean_q: 6368.097 Interval 2977 (1488000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0151 Interval 2978 (1488500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3680 1 episodes - episode_reward: -1515.111 [-1515.111, -1515.111] - loss: 93081.203 - mae: 4907.735 - mean_q: 6726.731 Interval 2979 (1489000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5849 1 episodes - episode_reward: -1298.697 [-1298.697, -1298.697] - loss: 96811.297 - mae: 5032.350 - mean_q: 6882.584 Interval 2980 (1489500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2830 2 episodes - episode_reward: -757.531 [-1246.071, -268.991] - loss: 98788.383 - mae: 5170.170 - mean_q: 7047.239 Interval 2981 (1490000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8306 Interval 2982 (1490500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5364 4 episodes - episode_reward: -526.581 [-1375.007, -175.126] - loss: 112998.609 - mae: 5114.452 - mean_q: 6920.235 Interval 2983 (1491000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5392 4 episodes - episode_reward: -523.056 [-709.913, -378.618] - loss: 101710.094 - mae: 5032.866 - mean_q: 6804.995 Interval 2984 (1491500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5776 8 episodes - episode_reward: -229.360 [-404.114, -96.971] - loss: 117058.227 - mae: 4906.524 - mean_q: 6634.546 Interval 2985 (1492000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6829 2 episodes - episode_reward: -578.859 [-649.238, -508.480] - loss: 88212.188 - mae: 4805.168 - mean_q: 6512.133 Interval 2986 (1492500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8776 4 episodes - episode_reward: -393.471 [-620.568, -181.394] - loss: 75226.570 - mae: 4836.189 - mean_q: 6561.257 Interval 2987 (1493000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.7004 4 episodes - episode_reward: -461.663 [-779.911, -78.179] - loss: 81359.336 - mae: 4803.796 - mean_q: 6523.439 Interval 2988 (1493500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -6.4098 8 episodes - episode_reward: -407.221 [-705.741, -116.362] - loss: 103593.469 - mae: 4822.354 - mean_q: 6572.081 Interval 2989 (1494000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6797 4 episodes - episode_reward: -447.730 [-675.785, -240.648] - loss: 138487.391 - mae: 4860.192 - mean_q: 6623.495 Interval 2990 (1494500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.9615 5 episodes - episode_reward: -444.018 [-687.404, -123.732] - loss: 82467.406 - mae: 4853.900 - mean_q: 6605.221 Interval 2991 (1495000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.7305 6 episodes - episode_reward: -442.213 [-717.783, -124.932] - loss: 108888.719 - mae: 4932.597 - mean_q: 6702.541 Interval 2992 (1495500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5497 3 episodes - episode_reward: -239.964 [-406.468, -118.982] - loss: 89074.844 - mae: 4837.476 - mean_q: 6566.461 Interval 2993 (1496000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.3011 3 episodes - episode_reward: -856.806 [-1312.065, -125.623] - loss: 111131.984 - mae: 4716.079 - mean_q: 6385.274 Interval 2994 (1496500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.8763 3 episodes - episode_reward: -692.559 [-904.065, -449.981] - loss: 74774.781 - mae: 4546.965 - mean_q: 6161.225 Interval 2995 (1497000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5685 3 episodes - episode_reward: -335.327 [-615.303, -171.831] - loss: 78789.859 - mae: 4409.291 - mean_q: 5985.329 Interval 2996 (1497500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.7225 4 episodes - episode_reward: -538.245 [-633.765, -319.865] - loss: 66452.859 - mae: 4375.841 - mean_q: 5952.610 Interval 2997 (1498000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3666 2 episodes - episode_reward: -648.186 [-668.595, -627.777] - loss: 68380.617 - mae: 4442.053 - mean_q: 6060.312 Interval 2998 (1498500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4219 5 episodes - episode_reward: -392.685 [-569.861, -106.692] - loss: 76397.664 - mae: 4440.408 - mean_q: 6083.547 Interval 2999 (1499000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3650 4 episodes - episode_reward: -427.378 [-577.398, -180.452] - loss: 91979.805 - mae: 4571.568 - mean_q: 6289.523 Interval 3000 (1499500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.8628 4 episodes - episode_reward: -403.194 [-572.652, -251.622] - loss: 94914.250 - mae: 4711.369 - mean_q: 6490.923 Interval 3001 (1500000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5554 7 episodes - episode_reward: -310.037 [-643.276, -99.522] - loss: 128486.062 - mae: 4812.234 - mean_q: 6643.202 Interval 3002 (1500500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.8422 5 episodes - episode_reward: -399.625 [-680.140, -100.000] - loss: 122387.070 - mae: 4997.098 - mean_q: 6893.619 Interval 3003 (1501000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2621 5 episodes - episode_reward: -415.903 [-664.862, -129.953] - loss: 109828.750 - mae: 5042.924 - mean_q: 6949.760 Interval 3004 (1501500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2750 3 episodes - episode_reward: -380.643 [-794.584, -156.278] - loss: 101007.844 - mae: 5124.321 - mean_q: 7046.097 Interval 3005 (1502000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0127 2 episodes - episode_reward: -1197.010 [-1340.482, -1053.537] - loss: 116825.375 - mae: 5140.337 - mean_q: 7057.545 Interval 3006 (1502500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3612 1 episodes - episode_reward: -1277.812 [-1277.812, -1277.812] - loss: 114260.953 - mae: 5062.496 - mean_q: 6938.851 Interval 3007 (1503000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7896 2 episodes - episode_reward: -586.806 [-1058.350, -115.263] - loss: 117362.922 - mae: 5032.400 - mean_q: 6884.441 Interval 3008 (1503500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0111 4 episodes - episode_reward: -665.994 [-1296.301, -64.967] - loss: 127140.812 - mae: 5005.122 - mean_q: 6845.349 Interval 3009 (1504000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2436 1 episodes - episode_reward: -951.351 [-951.351, -951.351] - loss: 95883.070 - mae: 5072.019 - mean_q: 6943.515 Interval 3010 (1504500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6789 3 episodes - episode_reward: -739.987 [-1208.497, -141.522] - loss: 120581.086 - mae: 5212.840 - mean_q: 7140.956 Interval 3011 (1505000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1781 3 episodes - episode_reward: -608.561 [-989.796, -141.896] - loss: 101229.805 - mae: 5158.041 - mean_q: 7068.080 Interval 3012 (1505500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0073 3 episodes - episode_reward: -676.038 [-1058.944, -112.707] - loss: 123583.484 - mae: 5355.094 - mean_q: 7340.404 Interval 3013 (1506000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0016 2 episodes - episode_reward: -1031.630 [-1055.665, -1007.595] - loss: 130395.711 - mae: 5415.651 - mean_q: 7414.254 Interval 3014 (1506500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5936 1 episodes - episode_reward: -940.944 [-940.944, -940.944] - loss: 126744.133 - mae: 5489.906 - mean_q: 7501.171 Interval 3015 (1507000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3510 6 episodes - episode_reward: -342.364 [-1003.593, -34.502] - loss: 157173.141 - mae: 5463.412 - mean_q: 7448.806 Interval 3016 (1507500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.7836 2 episodes - episode_reward: -906.760 [-937.866, -875.654] - loss: 128626.969 - mae: 5532.091 - mean_q: 7527.925 Interval 3017 (1508000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3131 3 episodes - episode_reward: -531.313 [-858.016, -56.671] - loss: 131872.844 - mae: 5550.176 - mean_q: 7540.346 Interval 3018 (1508500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7947 2 episodes - episode_reward: -584.175 [-911.422, -256.928] - loss: 154723.922 - mae: 5563.466 - mean_q: 7534.404 Interval 3019 (1509000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5674 3 episodes - episode_reward: -367.474 [-552.755, -113.414] - loss: 141916.625 - mae: 5414.271 - mean_q: 7329.209 Interval 3020 (1509500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7258 2 episodes - episode_reward: -619.660 [-893.392, -345.927] - loss: 127736.156 - mae: 5468.226 - mean_q: 7410.231 Interval 3021 (1510000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4319 2 episodes - episode_reward: -507.578 [-953.950, -61.205] - loss: 146023.609 - mae: 5480.767 - mean_q: 7418.314 Interval 3022 (1510500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6939 1 episodes - episode_reward: -679.992 [-679.992, -679.992] - loss: 117051.648 - mae: 5368.071 - mean_q: 7248.063 Interval 3023 (1511000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9041 2 episodes - episode_reward: -458.642 [-829.171, -88.114] - loss: 117177.773 - mae: 5270.239 - mean_q: 7105.267 Interval 3024 (1511500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4001 1 episodes - episode_reward: -713.805 [-713.805, -713.805] - loss: 100434.859 - mae: 5162.590 - mean_q: 6946.735 Interval 3025 (1512000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9931 2 episodes - episode_reward: -547.564 [-844.250, -250.878] - loss: 123375.523 - mae: 5016.478 - mean_q: 6742.148 Interval 3026 (1512500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4448 1 episodes - episode_reward: -808.435 [-808.435, -808.435] - loss: 92697.906 - mae: 4871.239 - mean_q: 6547.915 Interval 3027 (1513000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7029 1 episodes - episode_reward: -689.769 [-689.769, -689.769] - loss: 87254.211 - mae: 4667.833 - mean_q: 6283.879 Interval 3028 (1513500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8318 4 episodes - episode_reward: -122.553 [-204.964, -12.794] - loss: 91422.969 - mae: 4439.662 - mean_q: 5967.729 Interval 3029 (1514000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3176 3 episodes - episode_reward: -330.649 [-481.719, -119.104] - loss: 82993.352 - mae: 4369.008 - mean_q: 5872.929 Interval 3030 (1514500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7115 3 episodes - episode_reward: -305.702 [-646.442, -121.187] - loss: 117853.414 - mae: 4252.542 - mean_q: 5706.607 Interval 3031 (1515000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8268 3 episodes - episode_reward: -342.138 [-565.786, -89.329] - loss: 112145.117 - mae: 4184.882 - mean_q: 5608.608 Interval 3032 (1515500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9614 2 episodes - episode_reward: -213.720 [-330.139, -97.301] - loss: 82462.945 - mae: 4123.021 - mean_q: 5525.249 Interval 3033 (1516000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8401 5 episodes - episode_reward: -181.332 [-310.948, -38.324] - loss: 113148.812 - mae: 4012.629 - mean_q: 5370.975 Interval 3034 (1516500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6373 1 episodes - episode_reward: -220.767 [-220.767, -220.767] - loss: 83762.039 - mae: 3976.998 - mean_q: 5326.745 Interval 3035 (1517000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9644 3 episodes - episode_reward: -374.827 [-683.478, -113.282] - loss: 106327.656 - mae: 3931.529 - mean_q: 5260.313 Interval 3036 (1517500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2516 3 episodes - episode_reward: -379.752 [-645.726, -183.059] - loss: 90876.922 - mae: 3961.707 - mean_q: 5305.274 Interval 3037 (1518000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8514 1 episodes - episode_reward: -80.770 [-80.770, -80.770] - loss: 155964.812 - mae: 3975.488 - mean_q: 5326.979 Interval 3038 (1518500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7671 4 episodes - episode_reward: -296.699 [-440.464, -51.041] - loss: 95107.555 - mae: 3964.034 - mean_q: 5318.543 Interval 3039 (1519000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6236 2 episodes - episode_reward: -198.558 [-277.052, -120.063] - loss: 97180.742 - mae: 3913.540 - mean_q: 5261.400 Interval 3040 (1519500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2142 2 episodes - episode_reward: -16.845 [-49.005, 15.316] - loss: 138980.500 - mae: 4059.498 - mean_q: 5466.689 Interval 3041 (1520000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7627 2 episodes - episode_reward: -424.287 [-480.004, -368.570] - loss: 110031.961 - mae: 4170.825 - mean_q: 5633.175 Interval 3042 (1520500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3149 5 episodes - episode_reward: -243.954 [-515.016, -10.849] - loss: 127296.266 - mae: 4328.784 - mean_q: 5877.130 Interval 3043 (1521000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2077 4 episodes - episode_reward: -146.080 [-457.157, -3.273] - loss: 113596.570 - mae: 4537.849 - mean_q: 6184.695 Interval 3044 (1521500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2857 Interval 3045 (1522000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1109 Interval 3046 (1522500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6185 4 episodes - episode_reward: -363.118 [-576.084, -100.000] - loss: 235006.297 - mae: 5104.843 - mean_q: 6953.325 Interval 3047 (1523000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6350 2 episodes - episode_reward: -132.080 [-158.790, -105.370] - loss: 128858.727 - mae: 5181.991 - mean_q: 7065.016 Interval 3048 (1523500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8063 5 episodes - episode_reward: -210.046 [-285.622, -132.003] - loss: 163847.203 - mae: 5246.859 - mean_q: 7131.911 Interval 3049 (1524000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6976 5 episodes - episode_reward: -158.643 [-441.588, -31.065] - loss: 161136.078 - mae: 5283.104 - mean_q: 7182.171 Interval 3050 (1524500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5136 3 episodes - episode_reward: -94.382 [-109.520, -65.308] - loss: 115715.398 - mae: 5266.646 - mean_q: 7151.680 Interval 3051 (1525000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1667 Interval 3052 (1525500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0979 Interval 3053 (1526000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.0756 Interval 3054 (1526500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0623 1 episodes - episode_reward: 83.276 [83.276, 83.276] - loss: 127521.961 - mae: 5160.581 - mean_q: 7006.635 Interval 3055 (1527000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7252 Interval 3056 (1527500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7684 2 episodes - episode_reward: -554.171 [-924.789, -183.552] - loss: 161510.797 - mae: 5124.710 - mean_q: 6942.562 Interval 3057 (1528000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6490 2 episodes - episode_reward: -753.412 [-1252.932, -253.891] - loss: 100954.719 - mae: 5122.889 - mean_q: 6941.318 Interval 3058 (1528500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4525 3 episodes - episode_reward: -723.805 [-1494.135, -330.003] - loss: 108896.734 - mae: 5002.027 - mean_q: 6769.328 Interval 3059 (1529000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5805 2 episodes - episode_reward: -357.355 [-450.921, -263.789] - loss: 98781.039 - mae: 5031.053 - mean_q: 6808.410 Interval 3060 (1529500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.9420 3 episodes - episode_reward: -651.982 [-1207.813, -288.285] - loss: 158164.906 - mae: 4848.771 - mean_q: 6539.430 Interval 3061 (1530000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5520 3 episodes - episode_reward: -628.430 [-1235.360, -279.465] - loss: 87174.922 - mae: 4724.589 - mean_q: 6363.650 Interval 3062 (1530500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8699 Interval 3063 (1531000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2371 1 episodes - episode_reward: -1789.311 [-1789.311, -1789.311] - loss: 76672.648 - mae: 4457.808 - mean_q: 5996.092 Interval 3064 (1531500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5529 3 episodes - episode_reward: -536.343 [-720.794, -167.825] - loss: 66289.602 - mae: 4324.678 - mean_q: 5811.896 Interval 3065 (1532000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.6532 5 episodes - episode_reward: -231.371 [-324.186, -175.609] - loss: 77400.664 - mae: 4246.024 - mean_q: 5701.563 Interval 3066 (1532500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1639 4 episodes - episode_reward: -303.087 [-506.600, -139.072] - loss: 61779.422 - mae: 4178.081 - mean_q: 5609.809 Interval 3067 (1533000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3135 3 episodes - episode_reward: -187.653 [-258.124, -143.323] - loss: 81380.797 - mae: 4068.039 - mean_q: 5463.133 Interval 3068 (1533500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2882 2 episodes - episode_reward: -556.653 [-952.973, -160.333] - loss: 61552.383 - mae: 3981.884 - mean_q: 5352.100 Interval 3069 (1534000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2242 5 episodes - episode_reward: -345.845 [-485.468, -133.331] - loss: 52410.828 - mae: 3938.059 - mean_q: 5305.335 Interval 3070 (1534500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0772 4 episodes - episode_reward: -379.198 [-481.078, -229.279] - loss: 77509.086 - mae: 3930.487 - mean_q: 5294.400 Interval 3071 (1535000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1485 4 episodes - episode_reward: -413.951 [-547.869, -194.068] - loss: 53262.285 - mae: 3975.782 - mean_q: 5380.436 Interval 3072 (1535500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6420 4 episodes - episode_reward: -320.954 [-456.525, -230.264] - loss: 63335.562 - mae: 4188.051 - mean_q: 5677.032 Interval 3073 (1536000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.9462 4 episodes - episode_reward: -359.845 [-426.675, -211.833] - loss: 70540.430 - mae: 4286.929 - mean_q: 5830.805 Interval 3074 (1536500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3739 5 episodes - episode_reward: -328.836 [-454.787, -265.083] - loss: 117249.953 - mae: 4524.442 - mean_q: 6163.361 Interval 3075 (1537000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4317 6 episodes - episode_reward: -384.361 [-625.905, -124.820] - loss: 70705.773 - mae: 4779.240 - mean_q: 6521.199 Interval 3076 (1537500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6943 4 episodes - episode_reward: -169.664 [-301.451, -107.044] - loss: 88293.906 - mae: 4942.218 - mean_q: 6731.418 Interval 3077 (1538000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0367 2 episodes - episode_reward: -237.017 [-342.525, -131.509] - loss: 91744.016 - mae: 5118.417 - mean_q: 6959.650 Interval 3078 (1538500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8048 2 episodes - episode_reward: -501.608 [-520.719, -482.498] - loss: 91066.539 - mae: 5234.458 - mean_q: 7106.916 Interval 3079 (1539000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2894 4 episodes - episode_reward: -283.094 [-343.497, -222.434] - loss: 88191.844 - mae: 5132.587 - mean_q: 6967.235 Interval 3080 (1539500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4346 Interval 3081 (1540000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8052 1 episodes - episode_reward: -703.983 [-703.983, -703.983] - loss: 99174.141 - mae: 5071.537 - mean_q: 6878.707 Interval 3082 (1540500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8542 3 episodes - episode_reward: -307.748 [-470.846, -188.832] - loss: 106884.086 - mae: 5135.543 - mean_q: 6959.813 Interval 3083 (1541000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8931 2 episodes - episode_reward: -272.236 [-278.311, -266.161] - loss: 113393.383 - mae: 5075.818 - mean_q: 6873.143 Interval 3084 (1541500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4466 5 episodes - episode_reward: -238.202 [-524.002, -105.263] - loss: 93165.555 - mae: 5119.474 - mean_q: 6937.690 Interval 3085 (1542000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3620 3 episodes - episode_reward: -898.284 [-1721.816, -383.220] - loss: 91768.352 - mae: 5082.590 - mean_q: 6882.636 Interval 3086 (1542500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.5152 2 episodes - episode_reward: -1341.519 [-1363.073, -1319.964] - loss: 105932.320 - mae: 5215.303 - mean_q: 7063.479 Interval 3087 (1543000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0518 2 episodes - episode_reward: -468.807 [-724.414, -213.199] - loss: 97335.531 - mae: 5212.541 - mean_q: 7062.944 Interval 3088 (1543500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.2751 4 episodes - episode_reward: -562.522 [-1129.207, -132.224] - loss: 126735.094 - mae: 5296.789 - mean_q: 7182.425 Interval 3089 (1544000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3762 1 episodes - episode_reward: -649.012 [-649.012, -649.012] - loss: 115437.648 - mae: 5536.284 - mean_q: 7512.694 Interval 3090 (1544500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4623 3 episodes - episode_reward: -1053.484 [-1667.656, -245.949] - loss: 102566.547 - mae: 5545.654 - mean_q: 7538.423 Interval 3091 (1545000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6373 1 episodes - episode_reward: -705.725 [-705.725, -705.725] - loss: 96053.094 - mae: 5715.152 - mean_q: 7761.745 Interval 3092 (1545500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4242 1 episodes - episode_reward: -1098.678 [-1098.678, -1098.678] - loss: 123307.234 - mae: 5868.832 - mean_q: 7974.931 Interval 3093 (1546000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.6609 3 episodes - episode_reward: -871.341 [-1154.866, -312.757] - loss: 115758.812 - mae: 5910.924 - mean_q: 8035.403 Interval 3094 (1546500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.2824 Interval 3095 (1547000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.6490 1 episodes - episode_reward: -2118.808 [-2118.808, -2118.808] - loss: 117368.242 - mae: 6309.797 - mean_q: 8572.635 Interval 3096 (1547500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5293 1 episodes - episode_reward: -1207.416 [-1207.416, -1207.416] - loss: 124970.070 - mae: 6458.583 - mean_q: 8777.772 Interval 3097 (1548000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5621 1 episodes - episode_reward: -1360.937 [-1360.937, -1360.937] - loss: 122298.352 - mae: 6717.709 - mean_q: 9126.769 Interval 3098 (1548500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.3763 2 episodes - episode_reward: -1539.816 [-1613.332, -1466.299] - loss: 146181.844 - mae: 6919.618 - mean_q: 9380.376 Interval 3099 (1549000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9336 Interval 3100 (1549500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0870 2 episodes - episode_reward: -932.721 [-1313.296, -552.146] - loss: 117800.969 - mae: 6939.839 - mean_q: 9377.100 Interval 3101 (1550000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.2180 1 episodes - episode_reward: -1490.608 [-1490.608, -1490.608] - loss: 111732.203 - mae: 6886.502 - mean_q: 9287.345 Interval 3102 (1550500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.4568 3 episodes - episode_reward: -1162.346 [-1861.617, -139.442] - loss: 104371.641 - mae: 6878.271 - mean_q: 9271.040 Interval 3103 (1551000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3539 1 episodes - episode_reward: -121.743 [-121.743, -121.743] - loss: 114330.820 - mae: 6746.082 - mean_q: 9085.430 Interval 3104 (1551500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.5761 Interval 3105 (1552000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4181 3 episodes - episode_reward: -886.277 [-1662.134, -181.257] - loss: 98898.328 - mae: 6581.544 - mean_q: 8872.022 Interval 3106 (1552500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6898 3 episodes - episode_reward: -356.974 [-697.079, -100.000] - loss: 110024.531 - mae: 6533.696 - mean_q: 8804.579 Interval 3107 (1553000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7823 3 episodes - episode_reward: -512.678 [-907.002, -244.104] - loss: 99194.305 - mae: 6325.085 - mean_q: 8524.983 Interval 3108 (1553500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3367 Interval 3109 (1554000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3920 1 episodes - episode_reward: -364.993 [-364.993, -364.993] - loss: 106780.102 - mae: 6232.222 - mean_q: 8389.912 Interval 3110 (1554500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3910 1 episodes - episode_reward: -187.249 [-187.249, -187.249] - loss: 100341.125 - mae: 6230.945 - mean_q: 8389.144 Interval 3111 (1555000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3201 1 episodes - episode_reward: -190.783 [-190.783, -190.783] - loss: 115392.367 - mae: 6209.679 - mean_q: 8357.843 Interval 3112 (1555500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3109 4 episodes - episode_reward: -546.140 [-1057.535, -120.312] - loss: 100858.016 - mae: 6208.682 - mean_q: 8362.819 Interval 3113 (1556000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0519 Interval 3114 (1556500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.8570 Interval 3115 (1557000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7655 1 episodes - episode_reward: -645.023 [-645.023, -645.023] - loss: 90050.023 - mae: 6152.725 - mean_q: 8286.702 Interval 3116 (1557500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7069 2 episodes - episode_reward: -518.769 [-536.379, -501.159] - loss: 94396.953 - mae: 6046.386 - mean_q: 8144.042 Interval 3117 (1558000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5882 2 episodes - episode_reward: -376.904 [-653.808, -100.000] - loss: 87123.477 - mae: 6031.022 - mean_q: 8119.616 Interval 3118 (1558500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1346 1 episodes - episode_reward: -200.871 [-200.871, -200.871] - loss: 100090.758 - mae: 5943.354 - mean_q: 7992.214 Interval 3119 (1559000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6909 2 episodes - episode_reward: -736.347 [-989.061, -483.633] - loss: 94661.547 - mae: 5971.996 - mean_q: 8036.292 Interval 3120 (1559500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7500 2 episodes - episode_reward: -557.416 [-636.651, -478.180] - loss: 84957.281 - mae: 5947.892 - mean_q: 8003.063 Interval 3121 (1560000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5741 1 episodes - episode_reward: -645.782 [-645.782, -645.782] - loss: 97004.945 - mae: 5854.606 - mean_q: 7874.856 Interval 3122 (1560500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4044 3 episodes - episode_reward: -767.027 [-1393.143, -248.927] - loss: 104051.617 - mae: 5783.158 - mean_q: 7780.318 Interval 3123 (1561000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3401 3 episodes - episode_reward: -484.076 [-711.561, -304.325] - loss: 106829.539 - mae: 5661.537 - mean_q: 7614.007 Interval 3124 (1561500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5471 4 episodes - episode_reward: -516.358 [-820.551, -322.593] - loss: 83286.977 - mae: 5521.560 - mean_q: 7429.707 Interval 3125 (1562000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7664 4 episodes - episode_reward: -308.932 [-520.163, -156.088] - loss: 84450.438 - mae: 5429.691 - mean_q: 7307.808 Interval 3126 (1562500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3870 2 episodes - episode_reward: -412.936 [-614.873, -211.000] - loss: 109000.078 - mae: 5336.174 - mean_q: 7178.637 Interval 3127 (1563000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5663 4 episodes - episode_reward: -297.473 [-472.672, -104.761] - loss: 95458.000 - mae: 5320.127 - mean_q: 7162.413 Interval 3128 (1563500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1280 4 episodes - episode_reward: -396.217 [-627.989, -100.000] - loss: 111655.930 - mae: 5272.296 - mean_q: 7094.696 Interval 3129 (1564000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2074 4 episodes - episode_reward: -309.608 [-530.693, -116.920] - loss: 74048.852 - mae: 5220.777 - mean_q: 7032.648 Interval 3130 (1564500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2485 4 episodes - episode_reward: -512.400 [-849.271, -138.254] - loss: 84174.805 - mae: 5206.145 - mean_q: 7010.354 Interval 3131 (1565000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4353 3 episodes - episode_reward: -507.137 [-802.883, -236.288] - loss: 111455.641 - mae: 5143.622 - mean_q: 6922.629 Interval 3132 (1565500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9712 5 episodes - episode_reward: -338.899 [-612.130, -156.916] - loss: 100243.820 - mae: 5170.933 - mean_q: 6967.063 Interval 3133 (1566000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.5480 4 episodes - episode_reward: -439.535 [-780.577, -131.057] - loss: 77081.195 - mae: 5165.002 - mean_q: 6975.241 Interval 3134 (1566500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4140 2 episodes - episode_reward: -604.224 [-698.814, -509.634] - loss: 81190.281 - mae: 5272.924 - mean_q: 7128.907 Interval 3135 (1567000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.4012 3 episodes - episode_reward: -567.807 [-817.262, -288.478] - loss: 90131.414 - mae: 5294.171 - mean_q: 7164.707 Interval 3136 (1567500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2664 3 episodes - episode_reward: -538.624 [-931.231, -299.236] - loss: 94134.180 - mae: 5518.574 - mean_q: 7480.088 Interval 3137 (1568000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6899 4 episodes - episode_reward: -458.030 [-978.884, -250.746] - loss: 79134.633 - mae: 5621.064 - mean_q: 7638.866 Interval 3138 (1568500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9795 4 episodes - episode_reward: -226.946 [-316.972, -90.091] - loss: 96945.984 - mae: 5889.661 - mean_q: 8011.468 Interval 3139 (1569000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1136 6 episodes - episode_reward: -208.754 [-328.020, -100.000] - loss: 79109.555 - mae: 6255.231 - mean_q: 8515.624 Interval 3140 (1569500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6561 3 episodes - episode_reward: -407.736 [-641.982, -200.053] - loss: 95538.125 - mae: 6582.967 - mean_q: 8964.814 Interval 3141 (1570000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4067 4 episodes - episode_reward: -198.016 [-301.274, -100.000] - loss: 108493.312 - mae: 7021.200 - mean_q: 9538.152 Interval 3142 (1570500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6314 4 episodes - episode_reward: -144.939 [-175.609, -111.006] - loss: 120584.617 - mae: 7350.399 - mean_q: 9968.991 Interval 3143 (1571000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1493 3 episodes - episode_reward: -254.689 [-368.170, -147.818] - loss: 102744.109 - mae: 7593.772 - mean_q: 10276.588 Interval 3144 (1571500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7710 1 episodes - episode_reward: -218.004 [-218.004, -218.004] - loss: 158112.703 - mae: 7828.537 - mean_q: 10590.824 Interval 3145 (1572000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8285 4 episodes - episode_reward: -155.143 [-266.094, -26.863] - loss: 93240.102 - mae: 7887.433 - mean_q: 10663.057 Interval 3146 (1572500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3967 1 episodes - episode_reward: -199.416 [-199.416, -199.416] - loss: 138009.172 - mae: 7962.513 - mean_q: 10756.651 Interval 3147 (1573000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0902 3 episodes - episode_reward: -307.208 [-606.078, -86.218] - loss: 131055.758 - mae: 8081.311 - mean_q: 10923.552 Interval 3148 (1573500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4702 1 episodes - episode_reward: -805.444 [-805.444, -805.444] - loss: 110020.641 - mae: 8207.267 - mean_q: 11081.502 Interval 3149 (1574000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0757 2 episodes - episode_reward: -200.729 [-202.500, -198.958] - loss: 124543.828 - mae: 8308.811 - mean_q: 11208.046 Interval 3150 (1574500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0073 1 episodes - episode_reward: -350.634 [-350.634, -350.634] - loss: 131373.766 - mae: 8283.521 - mean_q: 11168.489 Interval 3151 (1575000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2681 3 episodes - episode_reward: -439.478 [-643.444, -267.580] - loss: 162048.922 - mae: 8377.010 - mean_q: 11286.481 Interval 3152 (1575500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6218 3 episodes - episode_reward: -454.896 [-578.978, -323.700] - loss: 178492.359 - mae: 8404.082 - mean_q: 11322.364 Interval 3153 (1576000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0010 3 episodes - episode_reward: -268.973 [-449.094, -99.494] - loss: 107812.680 - mae: 8297.819 - mean_q: 11197.537 Interval 3154 (1576500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1893 4 episodes - episode_reward: -599.225 [-1644.622, -156.559] - loss: 127095.750 - mae: 8355.251 - mean_q: 11266.812 Interval 3155 (1577000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5641 2 episodes - episode_reward: -364.916 [-432.410, -297.423] - loss: 150209.938 - mae: 7988.609 - mean_q: 10782.622 Interval 3156 (1577500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -1.3443 2 episodes - episode_reward: -287.464 [-346.702, -228.226] - loss: 225706.812 - mae: 8049.049 - mean_q: 10869.768 Interval 3157 (1578000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5883 3 episodes - episode_reward: -414.110 [-777.120, -223.216] - loss: 127422.594 - mae: 7924.763 - mean_q: 10718.396 Interval 3158 (1578500 steps performed) 500/500 [==============================] - 388s 777ms/step - reward: -1.3617 2 episodes - episode_reward: -208.163 [-398.412, -17.914] - loss: 180754.750 - mae: 7831.998 - mean_q: 10583.752 Interval 3159 (1579000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.5678 4 episodes - episode_reward: -420.209 [-556.676, -136.180] - loss: 220627.062 - mae: 7704.466 - mean_q: 10421.817 Interval 3160 (1579500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5073 2 episodes - episode_reward: -295.459 [-330.910, -260.008] - loss: 170290.266 - mae: 7792.078 - mean_q: 10544.022 Interval 3161 (1580000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1991 2 episodes - episode_reward: -484.469 [-534.773, -434.165] - loss: 164091.094 - mae: 7631.326 - mean_q: 10339.247 Interval 3162 (1580500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2087 2 episodes - episode_reward: -682.522 [-771.019, -594.025] - loss: 175082.703 - mae: 7446.151 - mean_q: 10085.984 Interval 3163 (1581000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1824 1 episodes - episode_reward: -563.062 [-563.062, -563.062] - loss: 170965.891 - mae: 7326.761 - mean_q: 9938.710 Interval 3164 (1581500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.4006 3 episodes - episode_reward: -565.387 [-718.449, -417.885] - loss: 155101.031 - mae: 7330.803 - mean_q: 9947.263 Interval 3165 (1582000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.9395 4 episodes - episode_reward: -510.257 [-634.054, -328.355] - loss: 143363.859 - mae: 7249.994 - mean_q: 9853.065 Interval 3166 (1582500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.0166 3 episodes - episode_reward: -675.532 [-718.059, -629.549] - loss: 199534.266 - mae: 7330.342 - mean_q: 9962.537 Interval 3167 (1583000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4796 3 episodes - episode_reward: -368.054 [-507.531, -198.436] - loss: 171840.500 - mae: 7364.966 - mean_q: 10020.532 Interval 3168 (1583500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7848 3 episodes - episode_reward: -506.556 [-591.920, -375.108] - loss: 197903.594 - mae: 7423.538 - mean_q: 10095.545 Interval 3169 (1584000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6333 2 episodes - episode_reward: -571.890 [-586.040, -557.740] - loss: 219662.359 - mae: 7369.486 - mean_q: 10035.190 Interval 3170 (1584500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5021 3 episodes - episode_reward: -642.423 [-702.267, -545.868] - loss: 231846.172 - mae: 7578.536 - mean_q: 10347.673 Interval 3171 (1585000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0246 2 episodes - episode_reward: -738.914 [-838.853, -638.975] - loss: 302312.375 - mae: 7663.907 - mean_q: 10462.448 Interval 3172 (1585500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4318 2 episodes - episode_reward: -532.390 [-1003.739, -61.042] - loss: 222369.109 - mae: 7911.993 - mean_q: 10830.052 Interval 3173 (1586000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.8406 4 episodes - episode_reward: -500.385 [-876.821, -141.174] - loss: 258340.984 - mae: 8202.193 - mean_q: 11214.138 Interval 3174 (1586500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9713 2 episodes - episode_reward: -910.117 [-968.589, -851.645] - loss: 333954.125 - mae: 8582.433 - mean_q: 11742.762 Interval 3175 (1587000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.7131 1 episodes - episode_reward: -1332.049 [-1332.049, -1332.049] - loss: 277880.719 - mae: 8823.132 - mean_q: 12077.758 Interval 3176 (1587500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.9195 4 episodes - episode_reward: -346.871 [-712.142, -151.142] - loss: 188891.859 - mae: 9202.343 - mean_q: 12624.947 Interval 3177 (1588000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.1899 3 episodes - episode_reward: -823.762 [-1147.119, -191.764] - loss: 546038.625 - mae: 9722.485 - mean_q: 13340.631 Interval 3178 (1588500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.9744 1 episodes - episode_reward: -1199.805 [-1199.805, -1199.805] - loss: 502252.188 - mae: 10226.006 - mean_q: 14059.240 Interval 3179 (1589000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.9779 2 episodes - episode_reward: -1007.890 [-1684.413, -331.366] - loss: 472422.562 - mae: 10828.517 - mean_q: 14891.233 Interval 3180 (1589500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.4738 3 episodes - episode_reward: -816.650 [-1318.999, -130.009] - loss: 438569.000 - mae: 11591.275 - mean_q: 15937.268 Interval 3181 (1590000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5343 3 episodes - episode_reward: -575.352 [-1213.374, -248.582] - loss: 374087.219 - mae: 12007.965 - mean_q: 16489.367 Interval 3182 (1590500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1019 1 episodes - episode_reward: -999.455 [-999.455, -999.455] - loss: 566852.812 - mae: 12646.277 - mean_q: 17328.176 Interval 3183 (1591000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6104 1 episodes - episode_reward: -1113.596 [-1113.596, -1113.596] - loss: 540715.188 - mae: 13170.014 - mean_q: 18033.523 Interval 3184 (1591500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9276 2 episodes - episode_reward: -652.032 [-1023.053, -281.011] - loss: 675616.375 - mae: 13467.707 - mean_q: 18414.670 Interval 3185 (1592000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.8168 2 episodes - episode_reward: -1429.917 [-1782.959, -1076.874] - loss: 462247.969 - mae: 13607.767 - mean_q: 18604.723 Interval 3186 (1592500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8236 Interval 3187 (1593000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0510 1 episodes - episode_reward: -1228.346 [-1228.346, -1228.346] - loss: 729202.562 - mae: 13779.462 - mean_q: 18797.797 Interval 3188 (1593500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6896 3 episodes - episode_reward: -519.047 [-1081.359, -180.198] - loss: 411298.812 - mae: 13870.076 - mean_q: 18934.100 Interval 3189 (1594000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.0397 1 episodes - episode_reward: -1464.493 [-1464.493, -1464.493] - loss: 565836.875 - mae: 14079.231 - mean_q: 19203.840 Interval 3190 (1594500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1855 1 episodes - episode_reward: -1393.315 [-1393.315, -1393.315] - loss: 415282.469 - mae: 14169.426 - mean_q: 19344.078 Interval 3191 (1595000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3335 1 episodes - episode_reward: -1126.072 [-1126.072, -1126.072] - loss: 553072.938 - mae: 14150.071 - mean_q: 19294.047 Interval 3192 (1595500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4010 2 episodes - episode_reward: -758.276 [-1429.316, -87.237] - loss: 607532.625 - mae: 14532.671 - mean_q: 19823.531 Interval 3193 (1596000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2617 Interval 3194 (1596500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0620 1 episodes - episode_reward: -1270.480 [-1270.480, -1270.480] - loss: 530722.062 - mae: 14556.981 - mean_q: 19844.129 Interval 3195 (1597000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0603 1 episodes - episode_reward: -1127.391 [-1127.391, -1127.391] - loss: 693420.500 - mae: 14904.812 - mean_q: 20299.990 Interval 3196 (1597500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8380 2 episodes - episode_reward: -677.327 [-1204.316, -150.338] - loss: 567447.312 - mae: 14815.722 - mean_q: 20199.609 Interval 3197 (1598000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0859 1 episodes - episode_reward: -1465.791 [-1465.791, -1465.791] - loss: 586184.062 - mae: 14990.772 - mean_q: 20432.771 Interval 3198 (1598500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.2552 1 episodes - episode_reward: -1371.562 [-1371.562, -1371.562] - loss: 439425.594 - mae: 15215.856 - mean_q: 20748.211 Interval 3199 (1599000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7839 1 episodes - episode_reward: -1379.316 [-1379.316, -1379.316] - loss: 545593.188 - mae: 15160.265 - mean_q: 20646.711 Interval 3200 (1599500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.4635 1 episodes - episode_reward: -2289.900 [-2289.900, -2289.900] - loss: 384864.562 - mae: 15155.893 - mean_q: 20664.145 Interval 3201 (1600000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7903 2 episodes - episode_reward: -490.033 [-846.589, -133.476] - loss: 500457.531 - mae: 15225.345 - mean_q: 20753.363 Interval 3202 (1600500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.0242 1 episodes - episode_reward: -1923.319 [-1923.319, -1923.319] - loss: 453126.312 - mae: 15063.533 - mean_q: 20524.246 Interval 3203 (1601000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9157 1 episodes - episode_reward: -1476.093 [-1476.093, -1476.093] - loss: 329628.062 - mae: 15150.540 - mean_q: 20671.840 Interval 3204 (1601500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9743 3 episodes - episode_reward: -400.803 [-895.344, -139.587] - loss: 364445.688 - mae: 15223.543 - mean_q: 20771.844 Interval 3205 (1602000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.1185 2 episodes - episode_reward: -1501.985 [-1955.800, -1048.170] - loss: 251813.031 - mae: 15399.904 - mean_q: 21033.467 Interval 3206 (1602500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.1805 1 episodes - episode_reward: -782.288 [-782.288, -782.288] - loss: 410697.125 - mae: 15703.582 - mean_q: 21439.215 Interval 3207 (1603000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.8709 2 episodes - episode_reward: -1621.968 [-2098.281, -1145.655] - loss: 369829.719 - mae: 15769.802 - mean_q: 21517.416 Interval 3208 (1603500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0863 2 episodes - episode_reward: -595.371 [-817.026, -373.716] - loss: 341735.562 - mae: 15918.024 - mean_q: 21742.240 Interval 3209 (1604000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.2379 2 episodes - episode_reward: -1473.752 [-1740.900, -1206.604] - loss: 386625.656 - mae: 16209.452 - mean_q: 22128.756 Interval 3210 (1604500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.2974 1 episodes - episode_reward: -1523.150 [-1523.150, -1523.150] - loss: 371037.156 - mae: 16304.720 - mean_q: 22248.980 Interval 3211 (1605000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -4.8204 2 episodes - episode_reward: -1195.637 [-1416.036, -975.238] - loss: 389425.438 - mae: 16516.943 - mean_q: 22546.600 Interval 3212 (1605500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.5020 1 episodes - episode_reward: -1298.640 [-1298.640, -1298.640] - loss: 295694.375 - mae: 16681.762 - mean_q: 22781.080 Interval 3213 (1606000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0572 3 episodes - episode_reward: -1041.593 [-1356.139, -783.756] - loss: 380440.125 - mae: 16964.535 - mean_q: 23153.609 Interval 3214 (1606500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.8181 2 episodes - episode_reward: -952.146 [-1022.126, -882.167] - loss: 426826.344 - mae: 16840.514 - mean_q: 22986.816 Interval 3215 (1607000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3777 3 episodes - episode_reward: -327.050 [-607.835, -114.548] - loss: 349286.125 - mae: 16733.826 - mean_q: 22848.424 Interval 3216 (1607500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2147 4 episodes - episode_reward: -582.791 [-880.497, -69.060] - loss: 400399.844 - mae: 16954.377 - mean_q: 23144.559 Interval 3217 (1608000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0373 3 episodes - episode_reward: -489.635 [-805.937, -175.339] - loss: 365413.031 - mae: 16745.184 - mean_q: 22857.344 Interval 3218 (1608500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.5525 2 episodes - episode_reward: -1007.232 [-1219.860, -794.604] - loss: 410024.375 - mae: 16751.951 - mean_q: 22846.078 Interval 3219 (1609000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6614 2 episodes - episode_reward: -852.649 [-930.026, -775.272] - loss: 329162.094 - mae: 16577.689 - mean_q: 22616.676 Interval 3220 (1609500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.7664 3 episodes - episode_reward: -709.919 [-962.448, -451.800] - loss: 282366.531 - mae: 16484.123 - mean_q: 22453.098 Interval 3221 (1610000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2195 2 episodes - episode_reward: -893.018 [-893.825, -892.211] - loss: 358479.625 - mae: 16496.598 - mean_q: 22435.111 Interval 3222 (1610500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4881 1 episodes - episode_reward: -664.847 [-664.847, -664.847] - loss: 430101.281 - mae: 16089.228 - mean_q: 21833.271 Interval 3223 (1611000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4727 1 episodes - episode_reward: -845.361 [-845.361, -845.361] - loss: 319399.688 - mae: 15800.163 - mean_q: 21422.688 Interval 3224 (1611500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9216 Interval 3225 (1612000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5064 1 episodes - episode_reward: -866.615 [-866.615, -866.615] - loss: 334483.562 - mae: 15144.940 - mean_q: 20507.820 Interval 3226 (1612500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2827 Interval 3227 (1613000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4460 1 episodes - episode_reward: -1187.771 [-1187.771, -1187.771] - loss: 340912.000 - mae: 14727.033 - mean_q: 19972.312 Interval 3228 (1613500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0358 1 episodes - episode_reward: -497.303 [-497.303, -497.303] - loss: 302883.562 - mae: 14551.398 - mean_q: 19744.482 Interval 3229 (1614000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.8980 2 episodes - episode_reward: -483.570 [-869.299, -97.841] - loss: 309026.969 - mae: 14429.110 - mean_q: 19597.805 Interval 3230 (1614500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5086 1 episodes - episode_reward: -813.953 [-813.953, -813.953] - loss: 354652.781 - mae: 14622.483 - mean_q: 19869.156 Interval 3231 (1615000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9609 2 episodes - episode_reward: -539.704 [-960.829, -118.578] - loss: 296280.688 - mae: 14522.832 - mean_q: 19762.363 Interval 3232 (1615500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1598 2 episodes - episode_reward: -943.075 [-1005.108, -881.041] - loss: 247608.969 - mae: 14669.577 - mean_q: 19986.123 Interval 3233 (1616000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9358 4 episodes - episode_reward: -195.361 [-352.170, -94.214] - loss: 350808.625 - mae: 14942.700 - mean_q: 20347.564 Interval 3234 (1616500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6614 2 episodes - episode_reward: -509.477 [-826.987, -191.967] - loss: 360587.781 - mae: 15094.901 - mean_q: 20556.340 Interval 3235 (1617000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7209 2 episodes - episode_reward: -450.838 [-806.520, -95.155] - loss: 379891.750 - mae: 15453.289 - mean_q: 21030.738 Interval 3236 (1617500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6088 Interval 3237 (1618000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9846 2 episodes - episode_reward: -341.194 [-395.611, -286.777] - loss: 389498.938 - mae: 16015.434 - mean_q: 21755.789 Interval 3238 (1618500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3507 3 episodes - episode_reward: -253.264 [-490.397, -119.630] - loss: 368483.688 - mae: 16312.797 - mean_q: 22173.207 Interval 3239 (1619000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3705 2 episodes - episode_reward: -104.924 [-115.725, -94.124] - loss: 339377.062 - mae: 16548.811 - mean_q: 22471.229 Interval 3240 (1619500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.7952 2 episodes - episode_reward: -123.369 [-141.155, -105.582] - loss: 407157.375 - mae: 16764.047 - mean_q: 22685.982 Interval 3241 (1620000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8606 2 episodes - episode_reward: -279.907 [-307.056, -252.757] - loss: 317669.281 - mae: 16685.195 - mean_q: 22575.393 Interval 3242 (1620500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2001 Interval 3243 (1621000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1623 Interval 3244 (1621500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1127 Interval 3245 (1622000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1490 Interval 3246 (1622500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1674 Interval 3247 (1623000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.0859 Interval 3248 (1623500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0469 Interval 3249 (1624000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: 0.1463 3 episodes - episode_reward: -143.346 [-375.555, 12.848] - loss: 366350.375 - mae: 16411.877 - mean_q: 22124.980 Interval 3250 (1624500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3719 Interval 3251 (1625000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.6561 3 episodes - episode_reward: -172.972 [-262.441, -76.912] - loss: 336321.875 - mae: 16022.200 - mean_q: 21599.266 Interval 3252 (1625500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2583 2 episodes - episode_reward: -59.684 [-88.235, -31.133] - loss: 390345.250 - mae: 16027.438 - mean_q: 21614.736 Interval 3253 (1626000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5633 4 episodes - episode_reward: -182.698 [-253.264, -102.023] - loss: 426764.500 - mae: 16059.287 - mean_q: 21646.148 Interval 3254 (1626500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1818 Interval 3255 (1627000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6044 1 episodes - episode_reward: -456.510 [-456.510, -456.510] - loss: 379258.969 - mae: 16353.239 - mean_q: 22127.377 Interval 3256 (1627500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2859 Interval 3257 (1628000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1804 Interval 3258 (1628500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2050 Interval 3259 (1629000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.0203 Interval 3260 (1629500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7880 1 episodes - episode_reward: -706.121 [-706.121, -706.121] - loss: 523450.719 - mae: 18261.420 - mean_q: 24704.932 Interval 3261 (1630000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1847 Interval 3262 (1630500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3791 Interval 3263 (1631000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1845 Interval 3264 (1631500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3175 Interval 3265 (1632000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4228 1 episodes - episode_reward: -601.761 [-601.761, -601.761] - loss: 929280.375 - mae: 19193.891 - mean_q: 25796.211 Interval 3266 (1632500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.0698 Interval 3267 (1633000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.3607 Interval 3268 (1633500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.3850 Interval 3269 (1634000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1268 2 episodes - episode_reward: -527.812 [-614.077, -441.546] - loss: 297672.594 - mae: 18418.441 - mean_q: 24764.170 Interval 3270 (1634500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5450 1 episodes - episode_reward: -128.703 [-128.703, -128.703] - loss: 510543.375 - mae: 18072.557 - mean_q: 24273.281 Interval 3271 (1635000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0184 1 episodes - episode_reward: -816.313 [-816.313, -816.313] - loss: 319674.500 - mae: 17936.281 - mean_q: 24113.725 Interval 3272 (1635500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: 0.2232 Interval 3273 (1636000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1041 2 episodes - episode_reward: -518.595 [-686.577, -350.613] - loss: 520516.438 - mae: 17198.189 - mean_q: 23096.680 Interval 3274 (1636500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.3373 2 episodes - episode_reward: -150.170 [-163.777, -136.563] - loss: 245912.156 - mae: 16888.811 - mean_q: 22718.561 Interval 3275 (1637000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4991 Interval 3276 (1637500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: 0.0578 Interval 3277 (1638000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2627 Interval 3278 (1638500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0562 Interval 3279 (1639000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3814 Interval 3280 (1639500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2379 Interval 3281 (1640000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2879 Interval 3282 (1640500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.3739 Interval 3283 (1641000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.0324 Interval 3284 (1641500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.3905 Interval 3285 (1642000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2569 Interval 3286 (1642500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.2137 Interval 3287 (1643000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2962 Interval 3288 (1643500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.0745 Interval 3289 (1644000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.5955 Interval 3290 (1644500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.2394 Interval 3291 (1645000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.6855 Interval 3292 (1645500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1567 Interval 3293 (1646000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.6610 Interval 3294 (1646500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.5129 Interval 3295 (1647000 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: 0.1005 Interval 3296 (1647500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: 0.4234 Interval 3297 (1648000 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -1.6566 Interval 3298 (1648500 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -2.9400 Interval 3299 (1649000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.5581 6 episodes - episode_reward: -1074.399 [-5378.742, -142.150] - loss: 895154.875 - mae: 20430.135 - mean_q: 27522.107 Interval 3300 (1649500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.2586 8 episodes - episode_reward: -141.795 [-217.713, -37.788] - loss: 961203.062 - mae: 21113.750 - mean_q: 28440.367 Interval 3301 (1650000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -2.4008 6 episodes - episode_reward: -199.309 [-259.223, -132.754] - loss: 931151.688 - mae: 21755.455 - mean_q: 29308.174 Interval 3302 (1650500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0829 6 episodes - episode_reward: -90.686 [-178.940, 14.390] - loss: 1070076.625 - mae: 22506.758 - mean_q: 30311.180 Interval 3303 (1651000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -0.5098 5 episodes - episode_reward: -51.876 [-118.347, 44.122] - loss: 1088220.750 - mae: 23481.002 - mean_q: 31623.717 Interval 3304 (1651500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2881 3 episodes - episode_reward: -54.176 [-93.012, -34.380] - loss: 1528757.625 - mae: 23853.096 - mean_q: 32129.355 Interval 3305 (1652000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8851 1 episodes - episode_reward: -397.950 [-397.950, -397.950] - loss: 1365763.000 - mae: 24731.096 - mean_q: 33308.672 Interval 3306 (1652500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.0216 1 episodes - episode_reward: -126.389 [-126.389, -126.389] - loss: 1287785.750 - mae: 25651.033 - mean_q: 34511.230 Interval 3307 (1653000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7400 1 episodes - episode_reward: -319.874 [-319.874, -319.874] - loss: 1352329.875 - mae: 26331.537 - mean_q: 35454.410 Interval 3308 (1653500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2545 Interval 3309 (1654000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2384 1 episodes - episode_reward: -280.757 [-280.757, -280.757] - loss: 1348914.750 - mae: 26689.350 - mean_q: 35964.500 Interval 3310 (1654500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5509 Interval 3311 (1655000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0169 3 episodes - episode_reward: -247.599 [-459.621, -120.522] - loss: 1516932.375 - mae: 27965.699 - mean_q: 37728.262 Interval 3312 (1655500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2381 Interval 3313 (1656000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1413 Interval 3314 (1656500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1527 Interval 3315 (1657000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1915 Interval 3316 (1657500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2141 Interval 3317 (1658000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2335 Interval 3318 (1658500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1430 Interval 3319 (1659000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1723 Interval 3320 (1659500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2065 Interval 3321 (1660000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1681 Interval 3322 (1660500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1775 Interval 3323 (1661000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1722 Interval 3324 (1661500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1208 Interval 3325 (1662000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1826 Interval 3326 (1662500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1614 Interval 3327 (1663000 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.2174 Interval 3328 (1663500 steps performed) 500/500 [==============================] - 7s 13ms/step - reward: -0.1695 Interval 3329 (1664000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1164 Interval 3330 (1664500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.2131 Interval 3331 (1665000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1733 Interval 3332 (1665500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1396 Interval 3333 (1666000 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.1673 Interval 3334 (1666500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1528 Interval 3335 (1667000 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.2135 Interval 3336 (1667500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -1.2003 1 episodes - episode_reward: -2697.186 [-2697.186, -2697.186] - loss: 799754.062 - mae: 33239.184 - mean_q: 44748.746 Interval 3337 (1668000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1086 Interval 3338 (1668500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1458 Interval 3339 (1669000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1772 Interval 3340 (1669500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2345 Interval 3341 (1670000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1717 Interval 3342 (1670500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.1836 Interval 3343 (1671000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.1850 Interval 3344 (1671500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -2.6294 Interval 3345 (1672000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4141 1 episodes - episode_reward: -2568.651 [-2568.651, -2568.651] - loss: 729704.812 - mae: 34542.785 - mean_q: 46498.934 Interval 3346 (1672500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1988 Interval 3347 (1673000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1609 Interval 3348 (1673500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.6332 Interval 3349 (1674000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1647 1 episodes - episode_reward: -1619.328 [-1619.328, -1619.328] - loss: 596417.938 - mae: 37122.348 - mean_q: 50004.383 Interval 3350 (1674500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.1971 Interval 3351 (1675000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2022 Interval 3352 (1675500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.2176 Interval 3353 (1676000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1725 Interval 3354 (1676500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2157 Interval 3355 (1677000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.2016 4 episodes - episode_reward: -402.264 [-720.609, -100.000] - loss: 691311.500 - mae: 42214.438 - mean_q: 56933.660 Interval 3356 (1677500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0886 5 episodes - episode_reward: -208.065 [-566.708, -97.963] - loss: 768725.812 - mae: 42804.992 - mean_q: 57756.395 Interval 3357 (1678000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9070 1 episodes - episode_reward: -813.277 [-813.277, -813.277] - loss: 621263.375 - mae: 43989.246 - mean_q: 59354.777 Interval 3358 (1678500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9858 3 episodes - episode_reward: -217.697 [-248.241, -165.342] - loss: 840238.375 - mae: 45459.578 - mean_q: 61331.109 Interval 3359 (1679000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7849 1 episodes - episode_reward: -311.467 [-311.467, -311.467] - loss: 830272.812 - mae: 46294.500 - mean_q: 62426.031 Interval 3360 (1679500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7272 5 episodes - episode_reward: -267.295 [-474.582, -128.789] - loss: 690853.062 - mae: 47233.555 - mean_q: 63663.477 Interval 3361 (1680000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6266 3 episodes - episode_reward: -312.657 [-592.007, -104.693] - loss: 1129412.125 - mae: 48628.371 - mean_q: 65504.613 Interval 3362 (1680500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6975 3 episodes - episode_reward: -389.307 [-399.795, -374.761] - loss: 858584.438 - mae: 49358.023 - mean_q: 66508.766 Interval 3363 (1681000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4754 3 episodes - episode_reward: -293.471 [-421.521, -173.120] - loss: 666011.500 - mae: 50115.730 - mean_q: 67534.062 Interval 3364 (1681500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.3103 4 episodes - episode_reward: -651.264 [-942.687, -381.702] - loss: 689182.000 - mae: 50784.730 - mean_q: 68417.695 Interval 3365 (1682000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2937 3 episodes - episode_reward: -397.492 [-758.370, -144.886] - loss: 925707.750 - mae: 51310.043 - mean_q: 69125.250 Interval 3366 (1682500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7605 3 episodes - episode_reward: -159.831 [-231.335, -108.090] - loss: 920287.125 - mae: 52644.012 - mean_q: 70931.883 Interval 3367 (1683000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.2068 3 episodes - episode_reward: -803.659 [-844.961, -748.598] - loss: 795130.188 - mae: 53339.836 - mean_q: 71837.562 Interval 3368 (1683500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3207 2 episodes - episode_reward: -306.201 [-386.035, -226.366] - loss: 4426102.500 - mae: 53608.199 - mean_q: 72011.805 Interval 3369 (1684000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6350 4 episodes - episode_reward: -364.680 [-541.334, -202.410] - loss: 596383.812 - mae: 53810.965 - mean_q: 72404.562 Interval 3370 (1684500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2143 4 episodes - episode_reward: -438.255 [-604.122, -119.116] - loss: 713685.750 - mae: 54302.188 - mean_q: 73057.805 Interval 3371 (1685000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.6486 2 episodes - episode_reward: -298.649 [-498.847, -98.450] - loss: 2802328.250 - mae: 53677.391 - mean_q: 72115.180 Interval 3372 (1685500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.5067 3 episodes - episode_reward: -429.376 [-616.923, -315.072] - loss: 2521822.750 - mae: 54737.910 - mean_q: 73531.148 Interval 3373 (1686000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9550 2 episodes - episode_reward: -494.934 [-834.026, -155.843] - loss: 671327.188 - mae: 54444.586 - mean_q: 73202.641 Interval 3374 (1686500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4438 1 episodes - episode_reward: -214.774 [-214.774, -214.774] - loss: 3987863.250 - mae: 53351.199 - mean_q: 71599.453 Interval 3375 (1687000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0645 4 episodes - episode_reward: -262.288 [-306.614, -219.518] - loss: 958543.375 - mae: 53950.254 - mean_q: 72471.758 Interval 3376 (1687500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6897 2 episodes - episode_reward: -621.248 [-1036.101, -206.394] - loss: 825483.312 - mae: 52382.723 - mean_q: 70370.172 Interval 3377 (1688000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3156 5 episodes - episode_reward: -216.222 [-282.102, -125.258] - loss: 2073583.250 - mae: 52284.422 - mean_q: 70205.492 Interval 3378 (1688500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9946 2 episodes - episode_reward: -459.773 [-553.865, -365.680] - loss: 742634.125 - mae: 52466.219 - mean_q: 70470.742 Interval 3379 (1689000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.5484 4 episodes - episode_reward: -644.086 [-1254.142, -270.999] - loss: 689450.125 - mae: 51728.012 - mean_q: 69494.555 Interval 3380 (1689500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0345 2 episodes - episode_reward: -360.369 [-397.160, -323.578] - loss: 734582.625 - mae: 49784.703 - mean_q: 66920.555 Interval 3381 (1690000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0758 3 episodes - episode_reward: -723.793 [-1531.042, -232.266] - loss: 2636989.000 - mae: 49486.352 - mean_q: 66470.016 Interval 3382 (1690500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4538 3 episodes - episode_reward: -397.179 [-450.295, -316.380] - loss: 663690.312 - mae: 47322.973 - mean_q: 63598.203 Interval 3383 (1691000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7077 4 episodes - episode_reward: -375.527 [-439.055, -258.525] - loss: 541880.000 - mae: 47049.672 - mean_q: 63258.730 Interval 3384 (1691500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4785 3 episodes - episode_reward: -391.442 [-455.481, -293.481] - loss: 691922.625 - mae: 45431.305 - mean_q: 61067.852 Interval 3385 (1692000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.6987 3 episodes - episode_reward: -639.490 [-1114.883, -358.304] - loss: 637491.625 - mae: 44013.508 - mean_q: 59135.355 Interval 3386 (1692500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1120 5 episodes - episode_reward: -298.395 [-496.595, -141.721] - loss: 568761.750 - mae: 42409.926 - mean_q: 56974.969 Interval 3387 (1693000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9411 2 episodes - episode_reward: -322.189 [-345.152, -299.226] - loss: 604624.062 - mae: 40216.852 - mean_q: 54039.871 Interval 3388 (1693500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5917 2 episodes - episode_reward: -343.327 [-346.926, -339.728] - loss: 706029.000 - mae: 39524.297 - mean_q: 53095.988 Interval 3389 (1694000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4718 3 episodes - episode_reward: -245.408 [-318.758, -103.875] - loss: 3211766.500 - mae: 38526.449 - mean_q: 51715.715 Interval 3390 (1694500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.8255 2 episodes - episode_reward: -168.681 [-237.361, -100.000] - loss: 565915.750 - mae: 35542.516 - mean_q: 47768.062 Interval 3391 (1695000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4599 8 episodes - episode_reward: -287.877 [-824.335, -97.798] - loss: 564134.438 - mae: 33958.555 - mean_q: 45637.410 Interval 3392 (1695500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -5.4056 7 episodes - episode_reward: -391.296 [-856.815, -111.862] - loss: 690941.125 - mae: 31360.463 - mean_q: 42136.668 Interval 3393 (1696000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3050 5 episodes - episode_reward: -230.339 [-467.374, -146.614] - loss: 631374.625 - mae: 29219.057 - mean_q: 39250.609 Interval 3394 (1696500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.4657 5 episodes - episode_reward: -447.508 [-776.608, -86.190] - loss: 599435.812 - mae: 27551.826 - mean_q: 37025.801 Interval 3395 (1697000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1058 4 episodes - episode_reward: -386.464 [-594.007, -153.257] - loss: 3282721.250 - mae: 25109.299 - mean_q: 33732.859 Interval 3396 (1697500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6448 3 episodes - episode_reward: -440.633 [-473.234, -420.308] - loss: 2456315.750 - mae: 23005.996 - mean_q: 30918.340 Interval 3397 (1698000 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.4698 4 episodes - episode_reward: -527.573 [-824.999, -144.094] - loss: 490101.219 - mae: 21017.189 - mean_q: 28282.006 Interval 3398 (1698500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3202 3 episodes - episode_reward: -405.292 [-459.454, -296.986] - loss: 659914.625 - mae: 19878.715 - mean_q: 26754.262 Interval 3399 (1699000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0595 5 episodes - episode_reward: -257.080 [-429.594, -139.178] - loss: 943385.125 - mae: 19661.648 - mean_q: 26453.879 Interval 3400 (1699500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0089 3 episodes - episode_reward: -694.570 [-872.849, -523.296] - loss: 935086.438 - mae: 19675.066 - mean_q: 26455.191 Interval 3401 (1700000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0370 2 episodes - episode_reward: -606.521 [-724.240, -488.802] - loss: 764939.250 - mae: 19361.635 - mean_q: 26025.469 Interval 3402 (1700500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3997 2 episodes - episode_reward: -648.283 [-773.255, -523.311] - loss: 814488.438 - mae: 19473.715 - mean_q: 26157.178 Interval 3403 (1701000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0928 3 episodes - episode_reward: -566.248 [-698.156, -483.728] - loss: 837732.688 - mae: 19100.348 - mean_q: 25666.820 Interval 3404 (1701500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.0412 3 episodes - episode_reward: -475.807 [-561.340, -390.798] - loss: 561285.500 - mae: 19157.070 - mean_q: 25789.457 Interval 3405 (1702000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.1871 4 episodes - episode_reward: -616.089 [-823.220, -510.429] - loss: 711660.062 - mae: 19824.008 - mean_q: 26710.914 Interval 3406 (1702500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3461 3 episodes - episode_reward: -405.364 [-534.896, -171.384] - loss: 630072.000 - mae: 20064.660 - mean_q: 27056.248 Interval 3407 (1703000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.3070 5 episodes - episode_reward: -407.310 [-628.563, -100.000] - loss: 683091.625 - mae: 20799.588 - mean_q: 28083.219 Interval 3408 (1703500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -4.0236 5 episodes - episode_reward: -343.511 [-622.816, -144.092] - loss: 736961.688 - mae: 21602.684 - mean_q: 29157.473 Interval 3409 (1704000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8883 3 episodes - episode_reward: -444.397 [-605.485, -201.988] - loss: 1326455.875 - mae: 22516.744 - mean_q: 30313.006 Interval 3410 (1704500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.0542 4 episodes - episode_reward: -542.655 [-658.278, -314.850] - loss: 1063663.500 - mae: 23152.207 - mean_q: 31187.979 Interval 3411 (1705000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9770 3 episodes - episode_reward: -462.671 [-570.708, -377.030] - loss: 1488427.250 - mae: 24336.205 - mean_q: 32718.473 Interval 3412 (1705500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.1033 Interval 3413 (1706000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1058 Interval 3414 (1706500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1099 Interval 3415 (1707000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4653 2 episodes - episode_reward: -427.995 [-749.854, -106.137] - loss: 1394276.500 - mae: 25339.170 - mean_q: 33929.164 Interval 3416 (1707500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.2620 1 episodes - episode_reward: -157.076 [-157.076, -157.076] - loss: 1048887.375 - mae: 25432.562 - mean_q: 34068.281 Interval 3417 (1708000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1502 3 episodes - episode_reward: -188.516 [-265.089, -93.480] - loss: 1510026.125 - mae: 25239.725 - mean_q: 33813.688 Interval 3418 (1708500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.5738 2 episodes - episode_reward: -126.715 [-177.132, -76.299] - loss: 1353454.375 - mae: 25064.666 - mean_q: 33573.441 Interval 3419 (1709000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9984 3 episodes - episode_reward: -323.813 [-458.607, -246.186] - loss: 940946.188 - mae: 24443.926 - mean_q: 32750.869 Interval 3420 (1709500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.1128 5 episodes - episode_reward: -204.825 [-278.865, -127.108] - loss: 1050414.625 - mae: 24087.564 - mean_q: 32299.154 Interval 3421 (1710000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3851 4 episodes - episode_reward: -352.786 [-542.856, -88.975] - loss: 1269325.625 - mae: 23577.914 - mean_q: 31646.064 Interval 3422 (1710500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8497 3 episodes - episode_reward: -369.027 [-456.169, -212.990] - loss: 1212483.500 - mae: 22690.559 - mean_q: 30492.662 Interval 3423 (1711000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8591 5 episodes - episode_reward: -327.136 [-542.171, -174.810] - loss: 1215291.000 - mae: 22318.330 - mean_q: 30028.453 Interval 3424 (1711500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.8760 4 episodes - episode_reward: -290.615 [-451.863, -218.341] - loss: 1133823.500 - mae: 21658.021 - mean_q: 29196.822 Interval 3425 (1712000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2611 7 episodes - episode_reward: -264.733 [-515.452, -143.286] - loss: 1466662.500 - mae: 21487.420 - mean_q: 29008.719 Interval 3426 (1712500 steps performed) 500/500 [==============================] - 2s 5ms/step - reward: -3.8644 4 episodes - episode_reward: -483.203 [-846.549, -197.065] - loss: 1017253.375 - mae: 20727.646 - mean_q: 28028.781 Interval 3427 (1713000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0739 2 episodes - episode_reward: -450.314 [-759.155, -141.473] - loss: 968294.188 - mae: 20625.434 - mean_q: 27911.428 Interval 3428 (1713500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0459 2 episodes - episode_reward: -449.793 [-468.878, -430.708] - loss: 1575779.125 - mae: 20173.975 - mean_q: 27291.516 Interval 3429 (1714000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -4.8454 5 episodes - episode_reward: -484.743 [-785.569, -100.000] - loss: 1389897.000 - mae: 20016.738 - mean_q: 27101.994 Interval 3430 (1714500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.1873 3 episodes - episode_reward: -624.725 [-786.416, -371.113] - loss: 1075072.625 - mae: 19789.328 - mean_q: 26828.834 Interval 3431 (1715000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.9676 2 episodes - episode_reward: -648.304 [-856.891, -439.717] - loss: 754850.438 - mae: 19641.492 - mean_q: 26660.543 Interval 3432 (1715500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.8045 3 episodes - episode_reward: -722.619 [-812.068, -559.668] - loss: 854341.062 - mae: 19401.195 - mean_q: 26326.406 Interval 3433 (1716000 steps performed) 500/500 [==============================] - 289s 579ms/step - reward: -2.1244 2 episodes - episode_reward: -483.193 [-801.153, -165.233] - loss: 951111.375 - mae: 19222.320 - mean_q: 26093.572 Interval 3434 (1716500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3874 3 episodes - episode_reward: -518.467 [-705.093, -200.088] - loss: 1393035.000 - mae: 19511.076 - mean_q: 26481.518 Interval 3435 (1717000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0176 3 episodes - episode_reward: -261.898 [-382.943, -103.067] - loss: 861370.875 - mae: 19098.430 - mean_q: 25932.809 Interval 3436 (1717500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.2733 4 episodes - episode_reward: -474.018 [-858.090, -110.399] - loss: 1070640.250 - mae: 19324.559 - mean_q: 26258.641 Interval 3437 (1718000 steps performed) 500/500 [==============================] - 11s 22ms/step - reward: -2.4468 4 episodes - episode_reward: -336.344 [-675.013, -65.048] - loss: 1039986.188 - mae: 18790.715 - mean_q: 25515.002 Interval 3438 (1718500 steps performed) 500/500 [==============================] - 14s 28ms/step - reward: -1.9604 3 episodes - episode_reward: -229.143 [-455.764, -89.084] - loss: 1208577.000 - mae: 18709.355 - mean_q: 25363.256 Interval 3439 (1719000 steps performed) 500/500 [==============================] - 14s 29ms/step - reward: -2.5788 3 episodes - episode_reward: -494.656 [-992.344, -115.469] - loss: 1185923.500 - mae: 18718.455 - mean_q: 25393.371 Interval 3440 (1719500 steps performed) 500/500 [==============================] - 1319s 3s/step - reward: -3.5104 1 episodes - episode_reward: -1324.372 [-1324.372, -1324.372] - loss: 1158471.375 - mae: 18867.135 - mean_q: 25599.449 Interval 3441 (1720000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.3297 1 episodes - episode_reward: -2216.811 [-2216.811, -2216.811] - loss: 1087319.125 - mae: 19234.035 - mean_q: 26119.346 Interval 3442 (1720500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.7088 2 episodes - episode_reward: -148.256 [-194.088, -102.423] - loss: 1844414.500 - mae: 19657.969 - mean_q: 26716.537 Interval 3443 (1721000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.1701 Interval 3444 (1721500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.0927 3 episodes - episode_reward: -356.807 [-438.293, -262.006] - loss: 1678601.375 - mae: 20789.578 - mean_q: 28451.904 Interval 3445 (1722000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4030 3 episodes - episode_reward: -253.011 [-406.217, -133.223] - loss: 1556052.125 - mae: 21824.074 - mean_q: 29927.945 Interval 3446 (1722500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2192 Interval 3447 (1723000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1155 Interval 3448 (1723500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2587 Interval 3449 (1724000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2010 Interval 3450 (1724500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.2836 Interval 3451 (1725000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3604 Interval 3452 (1725500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.7921 1 episodes - episode_reward: -1075.676 [-1075.676, -1075.676] - loss: 7477096.000 - mae: 34673.148 - mean_q: 47560.898 Interval 3453 (1726000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6826 Interval 3454 (1726500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4593 1 episodes - episode_reward: -855.121 [-855.121, -855.121] - loss: 8032721.500 - mae: 39482.738 - mean_q: 54108.441 Interval 3455 (1727000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3029 Interval 3456 (1727500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3847 3 episodes - episode_reward: -484.888 [-1295.889, -16.197] - loss: 8386719.500 - mae: 43810.879 - mean_q: 59954.574 Interval 3457 (1728000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8853 2 episodes - episode_reward: -590.747 [-851.897, -329.598] - loss: 8103155.000 - mae: 46040.645 - mean_q: 63056.020 Interval 3458 (1728500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1588 1 episodes - episode_reward: -225.776 [-225.776, -225.776] - loss: 8905041.000 - mae: 48394.551 - mean_q: 66218.062 Interval 3459 (1729000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4630 2 episodes - episode_reward: -425.940 [-530.352, -321.527] - loss: 7641001.500 - mae: 50862.531 - mean_q: 69614.445 Interval 3460 (1729500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -3.3687 3 episodes - episode_reward: -704.832 [-1026.466, -515.069] - loss: 8962366.000 - mae: 53221.438 - mean_q: 72861.727 Interval 3461 (1730000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9988 3 episodes - episode_reward: -321.143 [-647.902, -112.514] - loss: 7306771.500 - mae: 56599.027 - mean_q: 77664.961 Interval 3462 (1730500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3764 2 episodes - episode_reward: -595.807 [-721.980, -469.635] - loss: 5518174.000 - mae: 60490.914 - mean_q: 83043.031 Interval 3463 (1731000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.6627 5 episodes - episode_reward: -304.370 [-366.172, -177.356] - loss: 8323326.500 - mae: 64891.996 - mean_q: 88986.844 Interval 3464 (1731500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8963 3 episodes - episode_reward: -345.734 [-619.139, -207.935] - loss: 5107528.000 - mae: 68298.938 - mean_q: 93689.312 Interval 3465 (1732000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.4306 Interval 3466 (1732500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2427 Interval 3467 (1733000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1776 Interval 3468 (1733500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3414 Interval 3469 (1734000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2579 Interval 3470 (1734500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.3221 Interval 3471 (1735000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1404 Interval 3472 (1735500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1226 Interval 3473 (1736000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1108 Interval 3474 (1736500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.1507 Interval 3475 (1737000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1755 Interval 3476 (1737500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.5854 Interval 3477 (1738000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.4778 Interval 3478 (1738500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.5398 Interval 3479 (1739000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -1.5387 1 episodes - episode_reward: -2771.878 [-2771.878, -2771.878] - loss: 4421547.000 - mae: 81674.602 - mean_q: 110821.609 Interval 3480 (1739500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7405 Interval 3481 (1740000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7415 Interval 3482 (1740500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5202 2 episodes - episode_reward: -883.536 [-1657.835, -109.238] - loss: 7047077.500 - mae: 83576.039 - mean_q: 113644.141 Interval 3483 (1741000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8195 Interval 3484 (1741500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6138 2 episodes - episode_reward: -877.073 [-1654.146, -100.000] - loss: 5187496.500 - mae: 84326.023 - mean_q: 114495.992 Interval 3485 (1742000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8621 Interval 3486 (1742500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.9313 4 episodes - episode_reward: -633.749 [-1710.672, -100.000] - loss: 5943935.500 - mae: 83762.781 - mean_q: 113622.047 Interval 3487 (1743000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0991 Interval 3488 (1743500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.6153 Interval 3489 (1744000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.6030 1 episodes - episode_reward: -2875.537 [-2875.537, -2875.537] - loss: 6177139.000 - mae: 80973.055 - mean_q: 109644.570 Interval 3490 (1744500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3164 1 episodes - episode_reward: -1514.542 [-1514.542, -1514.542] - loss: 4914431.000 - mae: 80138.055 - mean_q: 108324.766 Interval 3491 (1745000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7116 1 episodes - episode_reward: -195.675 [-195.675, -195.675] - loss: 5210777.500 - mae: 79727.633 - mean_q: 107688.188 Interval 3492 (1745500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.2054 Interval 3493 (1746000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.5430 1 episodes - episode_reward: -403.384 [-403.384, -403.384] - loss: 6212797.000 - mae: 75784.188 - mean_q: 102181.859 Interval 3494 (1746500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.9617 3 episodes - episode_reward: -186.449 [-249.790, -86.080] - loss: 5251820.000 - mae: 75586.641 - mean_q: 101907.594 Interval 3495 (1747000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1420 2 episodes - episode_reward: -252.538 [-262.187, -242.888] - loss: 5743075.500 - mae: 73718.938 - mean_q: 99394.719 Interval 3496 (1747500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4132 4 episodes - episode_reward: -197.464 [-334.962, -93.700] - loss: 3777559.750 - mae: 72914.992 - mean_q: 98359.289 Interval 3497 (1748000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3241 2 episodes - episode_reward: -241.672 [-330.690, -152.654] - loss: 5364107.000 - mae: 71008.102 - mean_q: 95712.930 Interval 3498 (1748500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7733 1 episodes - episode_reward: -342.099 [-342.099, -342.099] - loss: 3733883.750 - mae: 70499.414 - mean_q: 95111.922 Interval 3499 (1749000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3791 3 episodes - episode_reward: -246.823 [-358.558, -100.000] - loss: 4140596.750 - mae: 69574.773 - mean_q: 93872.031 Interval 3500 (1749500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3981 3 episodes - episode_reward: -308.325 [-444.393, -166.650] - loss: 3664213.250 - mae: 68692.242 - mean_q: 92651.250 Interval 3501 (1750000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4413 1 episodes - episode_reward: -380.571 [-380.571, -380.571] - loss: 3983487.000 - mae: 67676.984 - mean_q: 91245.961 Interval 3502 (1750500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9726 4 episodes - episode_reward: -262.761 [-454.354, -180.897] - loss: 3667508.750 - mae: 67024.883 - mean_q: 90392.680 Interval 3503 (1751000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7854 4 episodes - episode_reward: -251.018 [-437.494, -169.848] - loss: 6394766.500 - mae: 66176.633 - mean_q: 89174.453 Interval 3504 (1751500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3782 3 episodes - episode_reward: -277.738 [-371.814, -220.901] - loss: 4085710.250 - mae: 65289.852 - mean_q: 88041.531 Interval 3505 (1752000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3361 2 episodes - episode_reward: -308.445 [-455.074, -161.815] - loss: 7696247.500 - mae: 64588.906 - mean_q: 87020.430 Interval 3506 (1752500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7844 2 episodes - episode_reward: -407.377 [-602.889, -211.866] - loss: 7698024.000 - mae: 64480.629 - mean_q: 86935.141 Interval 3507 (1753000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4421 2 episodes - episode_reward: -430.122 [-458.001, -402.243] - loss: 7821392.000 - mae: 64755.812 - mean_q: 87252.359 Interval 3508 (1753500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4687 1 episodes - episode_reward: -544.349 [-544.349, -544.349] - loss: 2969738.750 - mae: 64974.914 - mean_q: 87684.969 Interval 3509 (1754000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.4595 1 episodes - episode_reward: -617.100 [-617.100, -617.100] - loss: 3777721.250 - mae: 64270.547 - mean_q: 86748.625 Interval 3510 (1754500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.7853 4 episodes - episode_reward: -284.149 [-662.497, -100.079] - loss: 6488023.500 - mae: 64997.121 - mean_q: 87626.602 Interval 3511 (1755000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5464 2 episodes - episode_reward: -666.445 [-868.900, -463.990] - loss: 4042048.500 - mae: 64629.820 - mean_q: 87151.711 Interval 3512 (1755500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2511 1 episodes - episode_reward: -334.902 [-334.902, -334.902] - loss: 12575011.000 - mae: 64022.254 - mean_q: 86231.133 Interval 3513 (1756000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1883 2 episodes - episode_reward: -215.180 [-310.163, -120.196] - loss: 14537734.000 - mae: 64479.832 - mean_q: 86846.203 Interval 3514 (1756500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.1062 1 episodes - episode_reward: -564.832 [-564.832, -564.832] - loss: 3429548.250 - mae: 63606.191 - mean_q: 85829.633 Interval 3515 (1757000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9270 5 episodes - episode_reward: -274.889 [-514.631, -188.581] - loss: 10477609.000 - mae: 63744.785 - mean_q: 85920.578 Interval 3516 (1757500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9550 1 episodes - episode_reward: -883.371 [-883.371, -883.371] - loss: 4645551.000 - mae: 63277.500 - mean_q: 85431.250 Interval 3517 (1758000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.8487 2 episodes - episode_reward: -473.477 [-747.458, -199.496] - loss: 4069641.750 - mae: 63032.672 - mean_q: 85138.055 Interval 3518 (1758500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0707 1 episodes - episode_reward: -642.406 [-642.406, -642.406] - loss: 4680252.000 - mae: 63511.445 - mean_q: 85824.391 Interval 3519 (1759000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.8760 1 episodes - episode_reward: -652.660 [-652.660, -652.660] - loss: 3204333.000 - mae: 64337.520 - mean_q: 87022.570 Interval 3520 (1759500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9674 3 episodes - episode_reward: -506.933 [-941.186, -281.357] - loss: 5443035.500 - mae: 64335.508 - mean_q: 87055.422 Interval 3521 (1760000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7788 1 episodes - episode_reward: -875.517 [-875.517, -875.517] - loss: 6068795.000 - mae: 65028.227 - mean_q: 88020.523 Interval 3522 (1760500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.5559 1 episodes - episode_reward: -1546.501 [-1546.501, -1546.501] - loss: 4614694.000 - mae: 64948.922 - mean_q: 88069.344 Interval 3523 (1761000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.9230 Interval 3524 (1761500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4817 1 episodes - episode_reward: -1007.222 [-1007.222, -1007.222] - loss: 7744772.000 - mae: 67734.078 - mean_q: 91924.844 Interval 3525 (1762000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.8822 2 episodes - episode_reward: -1311.816 [-1351.721, -1271.911] - loss: 6432263.000 - mae: 68629.703 - mean_q: 93190.617 Interval 3526 (1762500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.6537 1 episodes - episode_reward: -916.710 [-916.710, -916.710] - loss: 6981263.000 - mae: 69206.391 - mean_q: 93934.195 Interval 3527 (1763000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.1439 2 episodes - episode_reward: -876.157 [-930.258, -822.056] - loss: 6364318.000 - mae: 69928.023 - mean_q: 94878.016 Interval 3528 (1763500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.8647 1 episodes - episode_reward: -1108.470 [-1108.470, -1108.470] - loss: 6624990.000 - mae: 70403.570 - mean_q: 95473.375 Interval 3529 (1764000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.1814 2 episodes - episode_reward: -836.444 [-953.886, -719.002] - loss: 8983371.000 - mae: 70967.961 - mean_q: 96228.961 Interval 3530 (1764500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1766 2 episodes - episode_reward: -468.192 [-858.335, -78.049] - loss: 7314371.000 - mae: 70556.359 - mean_q: 95610.367 Interval 3531 (1765000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6657 2 episodes - episode_reward: -727.252 [-735.864, -718.641] - loss: 21676438.000 - mae: 70661.180 - mean_q: 95555.945 Interval 3532 (1765500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6121 2 episodes - episode_reward: -508.936 [-733.079, -284.793] - loss: 11339629.000 - mae: 70381.531 - mean_q: 95295.461 Interval 3533 (1766000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6797 2 episodes - episode_reward: -786.320 [-811.198, -761.443] - loss: 11248063.000 - mae: 70020.461 - mean_q: 94761.211 Interval 3534 (1766500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6397 2 episodes - episode_reward: -531.672 [-564.974, -498.369] - loss: 10819480.000 - mae: 69987.242 - mean_q: 94640.438 Interval 3535 (1767000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8139 2 episodes - episode_reward: -642.617 [-659.353, -625.882] - loss: 4903015.500 - mae: 69024.414 - mean_q: 93372.031 Interval 3536 (1767500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.6651 3 episodes - episode_reward: -447.679 [-527.726, -361.291] - loss: 10161329.000 - mae: 68610.328 - mean_q: 92657.992 Interval 3537 (1768000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8827 3 episodes - episode_reward: -545.540 [-581.056, -496.093] - loss: 9169574.000 - mae: 67038.539 - mean_q: 90577.547 Interval 3538 (1768500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.9495 4 episodes - episode_reward: -398.185 [-618.095, -282.520] - loss: 4689609.000 - mae: 66145.859 - mean_q: 89474.797 Interval 3539 (1769000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4713 3 episodes - episode_reward: -390.841 [-600.488, -100.000] - loss: 7792771.500 - mae: 65134.863 - mean_q: 88058.414 Interval 3540 (1769500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9088 4 episodes - episode_reward: -374.595 [-477.054, -145.139] - loss: 2979115.000 - mae: 64659.168 - mean_q: 87425.914 Interval 3541 (1770000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.7659 1 episodes - episode_reward: -600.048 [-600.048, -600.048] - loss: 4148225.000 - mae: 63105.180 - mean_q: 85310.094 Interval 3542 (1770500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4889 1 episodes - episode_reward: -623.023 [-623.023, -623.023] - loss: 3312359.250 - mae: 61916.656 - mean_q: 83741.891 Interval 3543 (1771000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8090 3 episodes - episode_reward: -568.036 [-715.051, -493.729] - loss: 3884047.000 - mae: 60663.090 - mean_q: 81998.219 Interval 3544 (1771500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.7857 2 episodes - episode_reward: -620.041 [-675.791, -564.290] - loss: 3515220.750 - mae: 59638.559 - mean_q: 80616.492 Interval 3545 (1772000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1724 1 episodes - episode_reward: -1469.519 [-1469.519, -1469.519] - loss: 3858695.250 - mae: 59527.672 - mean_q: 80507.391 Interval 3546 (1772500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9454 2 episodes - episode_reward: -377.874 [-458.705, -297.043] - loss: 3332498.750 - mae: 59510.770 - mean_q: 80509.375 Interval 3547 (1773000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.3015 2 episodes - episode_reward: -502.845 [-575.787, -429.902] - loss: 4353160.000 - mae: 58349.145 - mean_q: 78962.430 Interval 3548 (1773500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.4093 5 episodes - episode_reward: -318.621 [-485.363, -97.899] - loss: 3926345.750 - mae: 58035.699 - mean_q: 78542.031 Interval 3549 (1774000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3458 2 episodes - episode_reward: -240.861 [-359.851, -121.870] - loss: 3384547.500 - mae: 58018.859 - mean_q: 78512.062 Interval 3550 (1774500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.5105 3 episodes - episode_reward: -310.812 [-445.400, -120.191] - loss: 4546399.000 - mae: 57266.270 - mean_q: 77445.586 Interval 3551 (1775000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4821 5 episodes - episode_reward: -216.683 [-466.149, -108.114] - loss: 3191358.250 - mae: 57059.559 - mean_q: 77137.344 Interval 3552 (1775500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.4073 3 episodes - episode_reward: -159.714 [-251.044, -97.335] - loss: 3520111.750 - mae: 56492.160 - mean_q: 76373.969 Interval 3553 (1776000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -0.4158 Interval 3554 (1776500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3594 3 episodes - episode_reward: -409.061 [-678.396, -195.700] - loss: 3862018.500 - mae: 56727.797 - mean_q: 76645.641 Interval 3555 (1777000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.1054 Interval 3556 (1777500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9963 Interval 3557 (1778000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0213 6 episodes - episode_reward: -270.635 [-863.641, -85.665] - loss: 4287069.500 - mae: 57227.625 - mean_q: 77360.375 Interval 3558 (1778500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.5305 4 episodes - episode_reward: -184.365 [-264.528, -139.578] - loss: 3941514.500 - mae: 58225.359 - mean_q: 78711.445 Interval 3559 (1779000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7726 6 episodes - episode_reward: -138.081 [-270.804, -90.138] - loss: 4309740.000 - mae: 59063.246 - mean_q: 79749.906 Interval 3560 (1779500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7994 4 episodes - episode_reward: -230.255 [-292.993, -152.233] - loss: 4462419.500 - mae: 58980.914 - mean_q: 79552.273 Interval 3561 (1780000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.2937 3 episodes - episode_reward: -235.945 [-271.728, -180.163] - loss: 5296240.000 - mae: 59452.391 - mean_q: 80090.992 Interval 3562 (1780500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.0861 6 episodes - episode_reward: -166.048 [-220.982, -126.948] - loss: 4461105.500 - mae: 59512.293 - mean_q: 80105.375 Interval 3563 (1781000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.7439 2 episodes - episode_reward: -102.628 [-123.794, -81.461] - loss: 6610570.000 - mae: 60114.723 - mean_q: 80827.672 Interval 3564 (1781500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.7672 5 episodes - episode_reward: -206.892 [-305.760, -100.750] - loss: 8265788.500 - mae: 60169.969 - mean_q: 80799.750 Interval 3565 (1782000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.9017 Interval 3566 (1782500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0686 3 episodes - episode_reward: -315.795 [-516.997, -98.019] - loss: 5889579.500 - mae: 59089.621 - mean_q: 79305.859 Interval 3567 (1783000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -2.2642 6 episodes - episode_reward: -204.148 [-364.303, -100.000] - loss: 8739039.000 - mae: 59019.965 - mean_q: 79257.734 Interval 3568 (1783500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3902 3 episodes - episode_reward: -215.901 [-283.446, -138.976] - loss: 8165290.000 - mae: 58783.730 - mean_q: 79015.609 Interval 3569 (1784000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.0327 3 episodes - episode_reward: -173.851 [-196.738, -162.157] - loss: 7654134.500 - mae: 58523.887 - mean_q: 78860.195 Interval 3570 (1784500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3664 2 episodes - episode_reward: -207.069 [-331.599, -82.540] - loss: 7665775.500 - mae: 58810.488 - mean_q: 79438.633 Interval 3571 (1785000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.9515 6 episodes - episode_reward: -209.791 [-465.422, -126.109] - loss: 11575330.000 - mae: 59555.348 - mean_q: 80567.156 Interval 3572 (1785500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4196 Interval 3573 (1786000 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -1.3694 4 episodes - episode_reward: -227.055 [-346.655, -113.477] - loss: 9359309.000 - mae: 61089.445 - mean_q: 82560.633 Interval 3574 (1786500 steps performed) 500/500 [==============================] - 3s 5ms/step - reward: -0.4829 2 episodes - episode_reward: -118.133 [-150.597, -85.669] - loss: 9233247.000 - mae: 60878.812 - mean_q: 82228.633 Interval 3575 (1787000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.8327 Interval 3576 (1787500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4904 Interval 3577 (1788000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0483 1 episodes - episode_reward: -1588.196 [-1588.196, -1588.196] - loss: 6351598.500 - mae: 58643.020 - mean_q: 78938.906 Interval 3578 (1788500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3602 Interval 3579 (1789000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.9156 2 episodes - episode_reward: -778.327 [-1325.258, -231.397] - loss: 4947735.000 - mae: 57030.336 - mean_q: 76845.727 Interval 3580 (1789500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.0390 Interval 3581 (1790000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.4169 4 episodes - episode_reward: -444.231 [-1120.455, -185.022] - loss: 3217082.250 - mae: 55070.195 - mean_q: 74397.617 Interval 3582 (1790500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.8221 4 episodes - episode_reward: -366.063 [-479.057, -203.747] - loss: 3441849.000 - mae: 54464.379 - mean_q: 73607.438 Interval 3583 (1791000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9648 3 episodes - episode_reward: -226.567 [-367.903, -151.836] - loss: 4181766.750 - mae: 54883.449 - mean_q: 74240.695 Interval 3584 (1791500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -1.5838 Interval 3585 (1792000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.7678 3 episodes - episode_reward: -827.091 [-2108.884, -149.701] - loss: 4758124.500 - mae: 54129.836 - mean_q: 73231.562 Interval 3586 (1792500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.3346 4 episodes - episode_reward: -246.944 [-391.476, -100.000] - loss: 3876203.500 - mae: 54223.629 - mean_q: 73491.219 Interval 3587 (1793000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6588 Interval 3588 (1793500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9448 1 episodes - episode_reward: -1818.087 [-1818.087, -1818.087] - loss: 3703378.000 - mae: 54373.809 - mean_q: 73705.469 Interval 3589 (1794000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.5888 3 episodes - episode_reward: -349.965 [-477.680, -248.751] - loss: 3258784.750 - mae: 54505.691 - mean_q: 73958.469 Interval 3590 (1794500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.4906 1 episodes - episode_reward: -306.130 [-306.130, -306.130] - loss: 3616934.500 - mae: 54704.797 - mean_q: 74144.375 Interval 3591 (1795000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.0039 Interval 3592 (1795500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.9722 2 episodes - episode_reward: -2196.683 [-4088.917, -304.448] - loss: 3520906.000 - mae: 54963.051 - mean_q: 74432.992 Interval 3593 (1796000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.7185 Interval 3594 (1796500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -5.1285 1 episodes - episode_reward: -3112.738 [-3112.738, -3112.738] - loss: 4344091.500 - mae: 54184.855 - mean_q: 73235.430 Interval 3595 (1797000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.6453 Interval 3596 (1797500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.5527 3 episodes - episode_reward: -1109.859 [-3017.670, -102.213] - loss: 4154161.750 - mae: 52607.578 - mean_q: 70953.828 Interval 3597 (1798000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -2.1122 1 episodes - episode_reward: -1538.116 [-1538.116, -1538.116] - loss: 2925036.500 - mae: 51235.266 - mean_q: 69114.125 Interval 3598 (1798500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7837 Interval 3599 (1799000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.6636 1 episodes - episode_reward: -1262.301 [-1262.301, -1262.301] - loss: 3168915.750 - mae: 48405.953 - mean_q: 65224.465 Interval 3600 (1799500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0492 Interval 3601 (1800000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.1228 4 episodes - episode_reward: -649.937 [-1718.606, -100.000] - loss: 3564494.000 - mae: 46548.453 - mean_q: 62712.641 Interval 3602 (1800500 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -3.7558 2 episodes - episode_reward: -388.519 [-504.316, -272.722] - loss: 3130752.750 - mae: 46121.836 - mean_q: 62119.176 Interval 3603 (1801000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -1.3403 2 episodes - episode_reward: -665.103 [-1230.206, -100.000] - loss: 3388370.750 - mae: 45205.805 - mean_q: 60876.344 Interval 3604 (1801500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.2192 1 episodes - episode_reward: -608.213 [-608.213, -608.213] - loss: 3316376.250 - mae: 44513.961 - mean_q: 59925.105 Interval 3605 (1802000 steps performed) 500/500 [==============================] - 3s 6ms/step - reward: -0.6118 Interval 3606 (1802500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6251 Interval 3607 (1803000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1449 Interval 3608 (1803500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.1476 Interval 3609 (1804000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.0942 Interval 3610 (1804500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: 0.1501 Interval 3611 (1805000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.0723 Interval 3612 (1805500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2378 Interval 3613 (1806000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1274 Interval 3614 (1806500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: 0.0332 Interval 3615 (1807000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.3011 1 episodes - episode_reward: -943.384 [-943.384, -943.384] - loss: 3865275.500 - mae: 42484.316 - mean_q: 57014.711 Interval 3616 (1807500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: 0.1032 Interval 3617 (1808000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.0255 3 episodes - episode_reward: -156.325 [-185.314, -113.639] - loss: 4047250.750 - mae: 42710.668 - mean_q: 57318.219 Interval 3618 (1808500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0319 Interval 3619 (1809000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.4048 2 episodes - episode_reward: -181.642 [-223.638, -139.645] - loss: 3189052.000 - mae: 43088.297 - mean_q: 57856.777 Interval 3620 (1809500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: 0.0318 1 episodes - episode_reward: 0.358 [0.358, 0.358] - loss: 3836868.500 - mae: 43555.875 - mean_q: 58473.262 Interval 3621 (1810000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.8811: 0s - reward: 2 episodes - episode_reward: -179.902 [-210.492, -149.313] - loss: 3624331.750 - mae: 43458.426 - mean_q: 58316.227 Interval 3622 (1810500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3637 1 episodes - episode_reward: -140.068 [-140.068, -140.068] - loss: 3516517.000 - mae: 43696.270 - mean_q: 58595.934 Interval 3623 (1811000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9706 3 episodes - episode_reward: -163.172 [-318.669, -23.016] - loss: 3415010.750 - mae: 43607.145 - mean_q: 58509.891 Interval 3624 (1811500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8854 1 episodes - episode_reward: -342.889 [-342.889, -342.889] - loss: 3225853.750 - mae: 43642.246 - mean_q: 58539.051 Interval 3625 (1812000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.6239 1 episodes - episode_reward: -397.071 [-397.071, -397.071] - loss: 5609394.500 - mae: 43731.098 - mean_q: 58632.738 Interval 3626 (1812500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1819 Interval 3627 (1813000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.3380 Interval 3628 (1813500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.1704 3 episodes - episode_reward: -482.775 [-534.240, -399.813] - loss: 3022970.250 - mae: 43903.820 - mean_q: 58885.688 Interval 3629 (1814000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6635 2 episodes - episode_reward: -115.689 [-127.982, -103.396] - loss: 4299068.000 - mae: 43856.309 - mean_q: 58713.172 Interval 3630 (1814500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -1.8384 4 episodes - episode_reward: -240.394 [-535.641, -111.415] - loss: 3084912.750 - mae: 43628.801 - mean_q: 58442.121 Interval 3631 (1815000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0313 4 episodes - episode_reward: -125.994 [-162.525, -103.004] - loss: 3966494.500 - mae: 42785.867 - mean_q: 57226.934 Interval 3632 (1815500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5395 Interval 3633 (1816000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7176 1 episodes - episode_reward: -1118.270 [-1118.270, -1118.270] - loss: 4345140.500 - mae: 42372.664 - mean_q: 56620.605 Interval 3634 (1816500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.2048 2 episodes - episode_reward: -367.910 [-507.214, -228.606] - loss: 2564940.000 - mae: 41705.070 - mean_q: 55817.160 Interval 3635 (1817000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.5386 5 episodes - episode_reward: -404.401 [-864.081, -118.558] - loss: 3828268.250 - mae: 41563.652 - mean_q: 55590.605 Interval 3636 (1817500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6112 Interval 3637 (1818000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0734 2 episodes - episode_reward: -439.029 [-647.347, -230.712] - loss: 2313070.000 - mae: 40573.668 - mean_q: 54364.336 Interval 3638 (1818500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4255 Interval 3639 (1819000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.3932 2 episodes - episode_reward: -384.712 [-483.698, -285.727] - loss: 2615478.250 - mae: 40125.234 - mean_q: 53753.281 Interval 3640 (1819500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.6097 4 episodes - episode_reward: -323.895 [-481.602, -175.320] - loss: 3669739.500 - mae: 40015.508 - mean_q: 53615.121 Interval 3641 (1820000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6475 3 episodes - episode_reward: -322.543 [-390.283, -209.073] - loss: 2027823.500 - mae: 40100.090 - mean_q: 53801.855 Interval 3642 (1820500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.2529 5 episodes - episode_reward: -251.211 [-480.940, -127.012] - loss: 5879918.500 - mae: 39203.078 - mean_q: 52610.191 Interval 3643 (1821000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1743 1 episodes - episode_reward: -474.512 [-474.512, -474.512] - loss: 2425791.750 - mae: 39355.441 - mean_q: 52922.691 Interval 3644 (1821500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1834 3 episodes - episode_reward: -553.305 [-662.887, -428.881] - loss: 3117532.250 - mae: 39488.793 - mean_q: 53162.438 Interval 3645 (1822000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4766 3 episodes - episode_reward: -379.148 [-489.920, -188.501] - loss: 3659968.750 - mae: 38766.113 - mean_q: 52228.664 Interval 3646 (1822500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.4638 5 episodes - episode_reward: -464.990 [-641.772, -206.329] - loss: 2422814.500 - mae: 38779.023 - mean_q: 52287.801 Interval 3647 (1823000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.2461 3 episodes - episode_reward: -424.029 [-619.776, -201.556] - loss: 3872468.250 - mae: 38567.855 - mean_q: 51944.496 Interval 3648 (1823500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.4790 5 episodes - episode_reward: -370.961 [-577.199, -218.055] - loss: 2990793.750 - mae: 38408.793 - mean_q: 51744.613 Interval 3649 (1824000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.7815 5 episodes - episode_reward: -332.083 [-448.420, -195.412] - loss: 2263186.000 - mae: 37654.621 - mean_q: 50700.074 Interval 3650 (1824500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.9339 4 episodes - episode_reward: -480.701 [-628.183, -382.048] - loss: 2334859.250 - mae: 37027.969 - mean_q: 49873.402 Interval 3651 (1825000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.2541 Interval 3652 (1825500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.4939 3 episodes - episode_reward: -427.312 [-932.295, -100.000] - loss: 2633217.250 - mae: 36765.902 - mean_q: 49550.832 Interval 3653 (1826000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.9172 4 episodes - episode_reward: -458.494 [-1323.963, -125.088] - loss: 2029591.875 - mae: 37710.324 - mean_q: 50797.242 Interval 3654 (1826500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.0381 3 episodes - episode_reward: -532.313 [-769.510, -341.743] - loss: 1972860.250 - mae: 36617.176 - mean_q: 49313.676 Interval 3655 (1827000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4873 3 episodes - episode_reward: -475.362 [-548.213, -344.079] - loss: 2345020.500 - mae: 36715.391 - mean_q: 49374.941 Interval 3656 (1827500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.5472 3 episodes - episode_reward: -569.874 [-1087.345, -231.737] - loss: 2141206.000 - mae: 35517.473 - mean_q: 47757.656 Interval 3657 (1828000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2759 1 episodes - episode_reward: -379.256 [-379.256, -379.256] - loss: 3145973.500 - mae: 35896.043 - mean_q: 48258.535 Interval 3658 (1828500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.2545 2 episodes - episode_reward: -1232.699 [-1350.549, -1114.850] - loss: 1843769.500 - mae: 35493.211 - mean_q: 47713.066 Interval 3659 (1829000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.6139 6 episodes - episode_reward: -296.403 [-821.179, -9.868] - loss: 3690499.500 - mae: 35653.668 - mean_q: 47900.254 Interval 3660 (1829500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -5.0230 1 episodes - episode_reward: -2187.242 [-2187.242, -2187.242] - loss: 1861415.625 - mae: 35722.242 - mean_q: 48035.832 Interval 3661 (1830000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.3882 4 episodes - episode_reward: -618.858 [-958.315, -344.383] - loss: 3209335.500 - mae: 35204.930 - mean_q: 47312.859 Interval 3662 (1830500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.9594 3 episodes - episode_reward: -196.751 [-241.185, -135.216] - loss: 2331061.000 - mae: 35947.723 - mean_q: 48313.895 Interval 3663 (1831000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.7007 7 episodes - episode_reward: -318.132 [-738.521, -120.714] - loss: 1624412.250 - mae: 35667.648 - mean_q: 47992.188 Interval 3664 (1831500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.5431 8 episodes - episode_reward: -349.261 [-688.102, -100.000] - loss: 5482870.000 - mae: 36474.367 - mean_q: 49037.723 Interval 3665 (1832000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.6609 3 episodes - episode_reward: -589.911 [-933.865, -127.426] - loss: 2555718.000 - mae: 36238.336 - mean_q: 48811.672 Interval 3666 (1832500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.3056 3 episodes - episode_reward: -547.342 [-840.046, -380.412] - loss: 4310698.500 - mae: 36873.156 - mean_q: 49536.125 Interval 3667 (1833000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.6298 5 episodes - episode_reward: -360.517 [-681.155, -160.936] - loss: 2092602.250 - mae: 37901.730 - mean_q: 51031.230 Interval 3668 (1833500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.8380 2 episodes - episode_reward: -618.262 [-831.125, -405.399] - loss: 1957961.000 - mae: 38698.430 - mean_q: 52136.055 Interval 3669 (1834000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.7794 7 episodes - episode_reward: -419.975 [-1093.161, -128.793] - loss: 7812130.500 - mae: 39789.836 - mean_q: 53526.441 Interval 3670 (1834500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.1663 3 episodes - episode_reward: -557.983 [-641.407, -475.384] - loss: 2774445.000 - mae: 40577.816 - mean_q: 54673.207 Interval 3671 (1835000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.0578 5 episodes - episode_reward: -400.254 [-794.525, -131.149] - loss: 5224099.500 - mae: 41727.930 - mean_q: 56182.422 Interval 3672 (1835500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.1106 6 episodes - episode_reward: -371.352 [-636.766, -122.809] - loss: 8585063.000 - mae: 43211.625 - mean_q: 58215.172 Interval 3673 (1836000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.3432 5 episodes - episode_reward: -425.653 [-610.428, -100.000] - loss: 10311401.000 - mae: 43922.496 - mean_q: 59142.652 Interval 3674 (1836500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.4045 4 episodes - episode_reward: -420.770 [-605.275, -119.566] - loss: 7891463.000 - mae: 45880.273 - mean_q: 61819.645 Interval 3675 (1837000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.7505 5 episodes - episode_reward: -390.595 [-569.851, -282.755] - loss: 11576156.000 - mae: 45971.766 - mean_q: 61901.578 Interval 3676 (1837500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4872 4 episodes - episode_reward: -431.815 [-614.391, -241.211] - loss: 2858974.250 - mae: 45256.852 - mean_q: 61140.730 Interval 3677 (1838000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.8355 3 episodes - episode_reward: -469.119 [-642.357, -270.709] - loss: 12699060.000 - mae: 46527.477 - mean_q: 62746.273 Interval 3678 (1838500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.6446 4 episodes - episode_reward: -458.816 [-746.428, -245.983] - loss: 3498947.250 - mae: 47163.473 - mean_q: 63753.281 Interval 3679 (1839000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.4691 4 episodes - episode_reward: -298.821 [-492.714, -211.589] - loss: 3974764.500 - mae: 49519.426 - mean_q: 66897.812 Interval 3680 (1839500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.7110 5 episodes - episode_reward: -252.557 [-408.209, -128.773] - loss: 14977921.000 - mae: 48908.027 - mean_q: 65958.508 Interval 3681 (1840000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.7760 3 episodes - episode_reward: -507.760 [-716.535, -315.275] - loss: 18478524.000 - mae: 47913.527 - mean_q: 64540.672 Interval 3682 (1840500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.9703 4 episodes - episode_reward: -479.104 [-621.897, -170.480] - loss: 2941192.000 - mae: 49131.672 - mean_q: 66308.266 Interval 3683 (1841000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.3291 6 episodes - episode_reward: -376.032 [-670.445, -120.643] - loss: 44582556.000 - mae: 51338.824 - mean_q: 68997.609 Interval 3684 (1841500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.0584 4 episodes - episode_reward: -369.634 [-579.749, -218.429] - loss: 12281942.000 - mae: 49085.375 - mean_q: 66254.156 Interval 3685 (1842000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4765 4 episodes - episode_reward: -300.927 [-613.790, -137.543] - loss: 12590454.000 - mae: 46007.684 - mean_q: 62148.391 Interval 3686 (1842500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.8302 5 episodes - episode_reward: -496.188 [-941.234, -122.139] - loss: 31617050.000 - mae: 47579.520 - mean_q: 64123.129 Interval 3687 (1843000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.3668 6 episodes - episode_reward: -362.097 [-591.086, -120.807] - loss: 15180303.000 - mae: 44975.688 - mean_q: 60643.457 Interval 3688 (1843500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -6.5569 7 episodes - episode_reward: -462.462 [-1190.460, -100.000] - loss: 21311418.000 - mae: 42974.691 - mean_q: 57988.812 Interval 3689 (1844000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.7935 5 episodes - episode_reward: -174.392 [-272.124, -106.689] - loss: 4766443.500 - mae: 45131.234 - mean_q: 61182.574 Interval 3690 (1844500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.6706 4 episodes - episode_reward: -406.676 [-642.379, -85.063] - loss: 27417622.000 - mae: 46499.531 - mean_q: 62895.875 Interval 3691 (1845000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.4245 6 episodes - episode_reward: -407.499 [-1001.687, -100.182] - loss: 17369184.000 - mae: 42048.438 - mean_q: 56928.352 Interval 3692 (1845500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.5946 4 episodes - episode_reward: -323.842 [-506.496, -128.413] - loss: 5912804.500 - mae: 41438.906 - mean_q: 56201.176 Interval 3693 (1846000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.3009 4 episodes - episode_reward: -400.319 [-593.966, -202.116] - loss: 6867910.000 - mae: 42213.633 - mean_q: 57275.988 Interval 3694 (1846500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9503 6 episodes - episode_reward: -252.949 [-363.031, -110.580] - loss: 4136370.250 - mae: 40789.297 - mean_q: 55377.457 Interval 3695 (1847000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -8.8363 4 episodes - episode_reward: -1119.089 [-1880.324, -206.425] - loss: 13100914.000 - mae: 39297.023 - mean_q: 53161.965 Interval 3696 (1847500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.2180 3 episodes - episode_reward: -840.565 [-2175.060, -167.738] - loss: 3852315.000 - mae: 37853.074 - mean_q: 51279.422 Interval 3697 (1848000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.6665 3 episodes - episode_reward: -427.481 [-544.438, -276.384] - loss: 4142273.250 - mae: 36782.707 - mean_q: 49891.031 Interval 3698 (1848500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -6.5315 3 episodes - episode_reward: -1130.065 [-1880.545, -374.480] - loss: 4518162.500 - mae: 36548.547 - mean_q: 49533.859 Interval 3699 (1849000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1353 2 episodes - episode_reward: -703.741 [-788.795, -618.687] - loss: 3497614.250 - mae: 35578.074 - mean_q: 48215.359 Interval 3700 (1849500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.8655 5 episodes - episode_reward: -296.894 [-591.743, -71.296] - loss: 5109319.500 - mae: 34683.176 - mean_q: 46910.738 Interval 3701 (1850000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1194 3 episodes - episode_reward: -509.356 [-728.036, -286.352] - loss: 4854302.500 - mae: 35387.281 - mean_q: 47807.254 Interval 3702 (1850500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.4342 5 episodes - episode_reward: -430.886 [-1102.607, -142.680] - loss: 3596356.250 - mae: 35492.102 - mean_q: 47931.188 Interval 3703 (1851000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.7799 4 episodes - episode_reward: -339.693 [-420.431, -241.581] - loss: 5096860.000 - mae: 34619.469 - mean_q: 46699.809 Interval 3704 (1851500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.2266 5 episodes - episode_reward: -294.201 [-384.380, -151.233] - loss: 3263130.500 - mae: 33054.277 - mean_q: 44531.910 Interval 3705 (1852000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.8392 10 episodes - episode_reward: -330.584 [-494.612, -129.395] - loss: 3949335.250 - mae: 32978.414 - mean_q: 44369.473 Interval 3706 (1852500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.3849 4 episodes - episode_reward: -355.463 [-623.487, -114.712] - loss: 3556512.250 - mae: 31155.137 - mean_q: 41909.664 Interval 3707 (1853000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.1801 4 episodes - episode_reward: -543.109 [-677.016, -448.324] - loss: 4106627.000 - mae: 29604.572 - mean_q: 39840.105 Interval 3708 (1853500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.3811 3 episodes - episode_reward: -629.583 [-796.199, -476.529] - loss: 3618204.000 - mae: 28005.414 - mean_q: 37701.445 Interval 3709 (1854000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.6369 5 episodes - episode_reward: -358.028 [-433.147, -236.041] - loss: 4317035.000 - mae: 26948.330 - mean_q: 36265.488 Interval 3710 (1854500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.0170 2 episodes - episode_reward: -512.946 [-552.605, -473.287] - loss: 4055155.750 - mae: 25400.279 - mean_q: 34289.695 Interval 3711 (1855000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.0010 3 episodes - episode_reward: -632.203 [-733.794, -557.148] - loss: 3589086.500 - mae: 24410.547 - mean_q: 33075.809 Interval 3712 (1855500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.0372 4 episodes - episode_reward: -402.010 [-541.065, -249.495] - loss: 4202826.000 - mae: 24114.873 - mean_q: 32770.926 Interval 3713 (1856000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.4632 2 episodes - episode_reward: -611.990 [-1004.995, -218.985] - loss: 3723338.000 - mae: 23585.840 - mean_q: 32078.160 Interval 3714 (1856500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.2811 2 episodes - episode_reward: -846.340 [-869.882, -822.797] - loss: 3406290.750 - mae: 24108.055 - mean_q: 32836.316 Interval 3715 (1857000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.6174 2 episodes - episode_reward: -841.018 [-861.814, -820.222] - loss: 4335706.500 - mae: 24834.057 - mean_q: 33841.531 Interval 3716 (1857500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4447 3 episodes - episode_reward: -477.521 [-730.295, -341.683] - loss: 3611063.500 - mae: 24573.607 - mean_q: 33479.648 Interval 3717 (1858000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.4977 4 episodes - episode_reward: -610.908 [-966.729, -194.367] - loss: 4357036.000 - mae: 25118.359 - mean_q: 34181.043 Interval 3718 (1858500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.5672 5 episodes - episode_reward: -392.927 [-648.161, -100.000] - loss: 6250975.000 - mae: 25272.531 - mean_q: 34332.969 Interval 3719 (1859000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.0293 4 episodes - episode_reward: -460.721 [-780.396, -317.125] - loss: 4989001.500 - mae: 24883.859 - mean_q: 33745.102 Interval 3720 (1859500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.7403 6 episodes - episode_reward: -337.750 [-521.910, -191.495] - loss: 3659653.000 - mae: 25312.408 - mean_q: 34346.262 Interval 3721 (1860000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.3375 2 episodes - episode_reward: -438.179 [-526.864, -349.495] - loss: 14614004.000 - mae: 26175.084 - mean_q: 35376.180 Interval 3722 (1860500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.5977 4 episodes - episode_reward: -508.908 [-1013.508, -161.571] - loss: 8357703.000 - mae: 25627.773 - mean_q: 34713.930 Interval 3723 (1861000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1253 2 episodes - episode_reward: -813.865 [-1126.104, -501.626] - loss: 4689778.500 - mae: 25758.773 - mean_q: 34963.152 Interval 3724 (1861500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1648 4 episodes - episode_reward: -399.496 [-590.488, -193.517] - loss: 4688293.000 - mae: 25756.697 - mean_q: 34937.148 Interval 3725 (1862000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.5750 2 episodes - episode_reward: -649.871 [-821.377, -478.366] - loss: 5021822.000 - mae: 26604.947 - mean_q: 36156.723 Interval 3726 (1862500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.2905 2 episodes - episode_reward: -580.607 [-758.981, -402.233] - loss: 4290375.000 - mae: 27150.363 - mean_q: 36952.352 Interval 3727 (1863000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8457 Interval 3728 (1863500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2777 Interval 3729 (1864000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7492 1 episodes - episode_reward: -780.236 [-780.236, -780.236] - loss: 7494269.000 - mae: 30499.982 - mean_q: 41636.062 Interval 3730 (1864500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.8636 3 episodes - episode_reward: -370.495 [-492.004, -284.158] - loss: 4076141.750 - mae: 32739.508 - mean_q: 44737.180 Interval 3731 (1865000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.3705 3 episodes - episode_reward: -594.247 [-1015.512, -367.591] - loss: 8174345.500 - mae: 33052.711 - mean_q: 45100.398 Interval 3732 (1865500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8602 Interval 3733 (1866000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.8269 1 episodes - episode_reward: -838.217 [-838.217, -838.217] - loss: 7023965.000 - mae: 36747.992 - mean_q: 49951.238 Interval 3734 (1866500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -1.1157 Interval 3735 (1867000 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.9177 1 episodes - episode_reward: -1368.385 [-1368.385, -1368.385] - loss: 6751311.000 - mae: 38607.156 - mean_q: 52452.531 Interval 3736 (1867500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.0913 1 episodes - episode_reward: -127.465 [-127.465, -127.465] - loss: 4923758.500 - mae: 39361.379 - mean_q: 53498.699 Interval 3737 (1868000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.0513 2 episodes - episode_reward: -978.348 [-1737.382, -219.315] - loss: 4975548.500 - mae: 40724.754 - mean_q: 55292.883 Interval 3738 (1868500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8457 2 episodes - episode_reward: -314.714 [-469.266, -160.161] - loss: 7695796.500 - mae: 39513.184 - mean_q: 53665.121 Interval 3739 (1869000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.7241 2 episodes - episode_reward: -746.597 [-1114.138, -379.055] - loss: 5478989.500 - mae: 39793.699 - mean_q: 54161.152 Interval 3740 (1869500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.7192 4 episodes - episode_reward: -355.114 [-766.435, -143.145] - loss: 4858284.500 - mae: 40926.242 - mean_q: 55694.758 Interval 3741 (1870000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1193 3 episodes - episode_reward: -554.716 [-878.929, -241.900] - loss: 5195946.000 - mae: 41679.238 - mean_q: 56704.129 Interval 3742 (1870500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.1550 4 episodes - episode_reward: -397.076 [-858.488, -101.696] - loss: 6273013.000 - mae: 43276.402 - mean_q: 58894.555 Interval 3743 (1871000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5174 1 episodes - episode_reward: -393.310 [-393.310, -393.310] - loss: 6501269.500 - mae: 44694.180 - mean_q: 60848.043 Interval 3744 (1871500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.5350 2 episodes - episode_reward: -803.191 [-951.272, -655.111] - loss: 5890025.000 - mae: 45799.812 - mean_q: 62421.152 Interval 3745 (1872000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.8017 2 episodes - episode_reward: -237.039 [-374.078, -100.000] - loss: 7222551.000 - mae: 46176.504 - mean_q: 62929.871 Interval 3746 (1872500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -0.9697 Interval 3747 (1873000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -1.0534 Interval 3748 (1873500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.7645 1 episodes - episode_reward: -1552.539 [-1552.539, -1552.539] - loss: 7086759.000 - mae: 51601.941 - mean_q: 70299.672 Interval 3749 (1874000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.3602 Interval 3750 (1874500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.3738 1 episodes - episode_reward: -1677.260 [-1677.260, -1677.260] - loss: 8036856.000 - mae: 55884.758 - mean_q: 76063.539 Interval 3751 (1875000 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -1.3561 Interval 3752 (1875500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -2.1249 1 episodes - episode_reward: -2212.431 [-2212.431, -2212.431] - loss: 6805450.000 - mae: 58266.531 - mean_q: 79304.391 Interval 3753 (1876000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.7132 1 episodes - episode_reward: -720.347 [-720.347, -720.347] - loss: 8804175.000 - mae: 59179.535 - mean_q: 80528.945 Interval 3754 (1876500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4833 1 episodes - episode_reward: -867.992 [-867.992, -867.992] - loss: 7722408.500 - mae: 60629.602 - mean_q: 82564.422 Interval 3755 (1877000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1394 1 episodes - episode_reward: -320.114 [-320.114, -320.114] - loss: 11095404.000 - mae: 61325.289 - mean_q: 83489.938 Interval 3756 (1877500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5521 1 episodes - episode_reward: -679.076 [-679.076, -679.076] - loss: 7104364.000 - mae: 62221.281 - mean_q: 84759.602 Interval 3757 (1878000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.1754 Interval 3758 (1878500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5423 1 episodes - episode_reward: -1542.732 [-1542.732, -1542.732] - loss: 8619916.000 - mae: 63825.227 - mean_q: 86823.211 Interval 3759 (1879000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -2.8765 1 episodes - episode_reward: -1934.114 [-1934.114, -1934.114] - loss: 11369180.000 - mae: 64048.668 - mean_q: 87051.789 Interval 3760 (1879500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4621 1 episodes - episode_reward: -443.342 [-443.342, -443.342] - loss: 9770433.000 - mae: 64509.207 - mean_q: 87639.906 Interval 3761 (1880000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1204 Interval 3762 (1880500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.4459 1 episodes - episode_reward: -2433.226 [-2433.226, -2433.226] - loss: 7872800.500 - mae: 65084.602 - mean_q: 88420.961 Interval 3763 (1881000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.3756 1 episodes - episode_reward: -499.885 [-499.885, -499.885] - loss: 14361222.000 - mae: 64955.641 - mean_q: 88164.430 Interval 3764 (1881500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.0841 1 episodes - episode_reward: -1637.630 [-1637.630, -1637.630] - loss: 7713363.000 - mae: 66220.180 - mean_q: 89925.914 Interval 3765 (1882000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3472 Interval 3766 (1882500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.8344 1 episodes - episode_reward: -2151.492 [-2151.492, -2151.492] - loss: 7077176.000 - mae: 68445.359 - mean_q: 93014.797 Interval 3767 (1883000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7082 4 episodes - episode_reward: -301.949 [-693.462, -100.565] - loss: 7513980.500 - mae: 68336.320 - mean_q: 92974.242 Interval 3768 (1883500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.8129 1 episodes - episode_reward: -1029.469 [-1029.469, -1029.469] - loss: 7839197.000 - mae: 68873.156 - mean_q: 93770.719 Interval 3769 (1884000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.5506 5 episodes - episode_reward: -434.078 [-904.953, -100.000] - loss: 9784973.000 - mae: 71486.016 - mean_q: 97215.320 Interval 3770 (1884500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.4474 1 episodes - episode_reward: -633.709 [-633.709, -633.709] - loss: 8682377.000 - mae: 72531.555 - mean_q: 98792.594 Interval 3771 (1885000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.0857 2 episodes - episode_reward: -675.169 [-833.168, -517.169] - loss: 10319366.000 - mae: 74167.680 - mean_q: 101052.609 Interval 3772 (1885500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.9261 2 episodes - episode_reward: -514.483 [-522.296, -506.670] - loss: 9969890.000 - mae: 76723.820 - mean_q: 104476.312 Interval 3773 (1886000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5023 Interval 3774 (1886500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.5340 3 episodes - episode_reward: -806.561 [-1246.352, -123.656] - loss: 13232904.000 - mae: 79271.656 - mean_q: 107843.031 Interval 3775 (1887000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7927 Interval 3776 (1887500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.6657 2 episodes - episode_reward: -1008.524 [-1361.766, -655.281] - loss: 9734070.000 - mae: 82646.297 - mean_q: 112217.086 Interval 3777 (1888000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.0744 1 episodes - episode_reward: -1765.673 [-1765.673, -1765.673] - loss: 10511770.000 - mae: 83327.383 - mean_q: 113070.172 Interval 3778 (1888500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2090 Interval 3779 (1889000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0223 3 episodes - episode_reward: -599.583 [-977.926, -308.964] - loss: 11351210.000 - mae: 84495.680 - mean_q: 114406.992 Interval 3780 (1889500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9223 Interval 3781 (1890000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.7177 2 episodes - episode_reward: -2114.851 [-2388.825, -1840.877] - loss: 9730459.000 - mae: 83010.578 - mean_q: 112177.930 Interval 3782 (1890500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.2677 4 episodes - episode_reward: -564.657 [-1039.230, -150.335] - loss: 10473879.000 - mae: 81023.391 - mean_q: 109396.438 Interval 3783 (1891000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.3846 2 episodes - episode_reward: -1116.560 [-1445.637, -787.482] - loss: 10618335.000 - mae: 79326.383 - mean_q: 107051.344 Interval 3784 (1891500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4294 2 episodes - episode_reward: -430.339 [-639.170, -221.508] - loss: 9893014.000 - mae: 77976.062 - mean_q: 105241.383 Interval 3785 (1892000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.5660 3 episodes - episode_reward: -295.495 [-569.713, -133.962] - loss: 9238094.000 - mae: 76799.023 - mean_q: 103637.188 Interval 3786 (1892500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.3722 4 episodes - episode_reward: -610.598 [-1037.198, -223.962] - loss: 10390087.000 - mae: 75579.102 - mean_q: 102050.156 Interval 3787 (1893000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.5934 4 episodes - episode_reward: -400.361 [-1325.146, -76.298] - loss: 9141592.000 - mae: 74648.117 - mean_q: 100829.945 Interval 3788 (1893500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.4109 3 episodes - episode_reward: -892.756 [-1576.508, -528.825] - loss: 7918299.000 - mae: 74843.469 - mean_q: 101065.875 Interval 3789 (1894000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.3519 5 episodes - episode_reward: -480.600 [-730.079, -182.127] - loss: 9801388.000 - mae: 73182.602 - mean_q: 98814.258 Interval 3790 (1894500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.6276 3 episodes - episode_reward: -465.828 [-957.408, -158.816] - loss: 9730029.000 - mae: 71925.352 - mean_q: 97148.664 Interval 3791 (1895000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9013 1 episodes - episode_reward: -561.117 [-561.117, -561.117] - loss: 9230277.000 - mae: 73071.914 - mean_q: 98742.664 Interval 3792 (1895500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.7553 Interval 3793 (1896000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.6618 3 episodes - episode_reward: -780.571 [-1504.391, -407.666] - loss: 8720279.000 - mae: 74073.250 - mean_q: 100011.180 Interval 3794 (1896500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1550 3 episodes - episode_reward: -697.677 [-1298.585, -265.781] - loss: 9682160.000 - mae: 73525.094 - mean_q: 99251.734 Interval 3795 (1897000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0119 1 episodes - episode_reward: -539.365 [-539.365, -539.365] - loss: 11502288.000 - mae: 74232.531 - mean_q: 100132.414 Interval 3796 (1897500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4736 2 episodes - episode_reward: -867.009 [-1377.578, -356.439] - loss: 13322594.000 - mae: 76324.984 - mean_q: 102884.289 Interval 3797 (1898000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.6786 2 episodes - episode_reward: -983.240 [-1150.370, -816.110] - loss: 11297266.000 - mae: 76205.523 - mean_q: 102674.797 Interval 3798 (1898500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0940 1 episodes - episode_reward: -749.046 [-749.046, -749.046] - loss: 15578607.000 - mae: 76363.406 - mean_q: 102697.859 Interval 3799 (1899000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.1788 2 episodes - episode_reward: -172.546 [-197.577, -147.516] - loss: 15026956.000 - mae: 76488.797 - mean_q: 102798.352 Interval 3800 (1899500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.2478 Interval 3801 (1900000 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.1818 Interval 3802 (1900500 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.2666 Interval 3803 (1901000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.0954 Interval 3804 (1901500 steps performed) 500/500 [==============================] - 7s 15ms/step - reward: -0.5120 Interval 3805 (1902000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.1647 1 episodes - episode_reward: -628.913 [-628.913, -628.913] - loss: 10704198.000 - mae: 79016.281 - mean_q: 106174.938 Interval 3806 (1902500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2755 1 episodes - episode_reward: -774.809 [-774.809, -774.809] - loss: 9003023.000 - mae: 78316.422 - mean_q: 105217.711 Interval 3807 (1903000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.7995 1 episodes - episode_reward: -201.039 [-201.039, -201.039] - loss: 11967820.000 - mae: 78314.703 - mean_q: 105214.211 Interval 3808 (1903500 steps performed) 500/500 [==============================] - 5s 9ms/step - reward: -0.2092 Interval 3809 (1904000 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -0.2511 Interval 3810 (1904500 steps performed) 500/500 [==============================] - 6s 11ms/step - reward: -0.2347 Interval 3811 (1905000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2012 Interval 3812 (1905500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.3539 Interval 3813 (1906000 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.0843 Interval 3814 (1906500 steps performed) 500/500 [==============================] - 6s 12ms/step - reward: -0.2585 Interval 3815 (1907000 steps performed) 500/500 [==============================] - 6s 13ms/step - reward: -0.1025 Interval 3816 (1907500 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.1117 Interval 3817 (1908000 steps performed) 500/500 [==============================] - 7s 14ms/step - reward: -0.3781 Interval 3818 (1908500 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.1642 Interval 3819 (1909000 steps performed) 500/500 [==============================] - 8s 15ms/step - reward: -0.1867 Interval 3820 (1909500 steps performed) 500/500 [==============================] - 8s 16ms/step - reward: -0.1552 Interval 3821 (1910000 steps performed) 500/500 [==============================] - 8s 17ms/step - reward: -0.1482 Interval 3822 (1910500 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.0693 Interval 3823 (1911000 steps performed) 500/500 [==============================] - 9s 18ms/step - reward: -0.3484 Interval 3824 (1911500 steps performed) 500/500 [==============================] - 10s 20ms/step - reward: -0.2357 Interval 3825 (1912000 steps performed) 500/500 [==============================] - 10s 19ms/step - reward: -0.0489 Interval 3826 (1912500 steps performed) 500/500 [==============================] - 5s 11ms/step - reward: -1.0903 1 episodes - episode_reward: -2047.183 [-2047.183, -2047.183] - loss: 8258608.500 - mae: 68718.586 - mean_q: 92302.156 Interval 3827 (1913000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.6163 1 episodes - episode_reward: -648.300 [-648.300, -648.300] - loss: 7015750.000 - mae: 68150.438 - mean_q: 91627.805 Interval 3828 (1913500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.3648 2 episodes - episode_reward: -1147.984 [-1546.053, -749.914] - loss: 12351014.000 - mae: 66712.727 - mean_q: 89651.445 Interval 3829 (1914000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.1893 1 episodes - episode_reward: -1638.142 [-1638.142, -1638.142] - loss: 11090383.000 - mae: 66275.875 - mean_q: 89184.078 Interval 3830 (1914500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.3176 2 episodes - episode_reward: -434.602 [-535.538, -333.667] - loss: 10502509.000 - mae: 65885.766 - mean_q: 88770.289 Interval 3831 (1915000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.0286 4 episodes - episode_reward: -276.775 [-436.596, -175.537] - loss: 11350404.000 - mae: 66694.938 - mean_q: 89901.109 Interval 3832 (1915500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.7838 2 episodes - episode_reward: -725.878 [-998.379, -453.378] - loss: 11144618.000 - mae: 66487.766 - mean_q: 89635.039 Interval 3833 (1916000 steps performed) 500/500 [==============================] - ETA: 0s - reward: -1.95 - 4s 7ms/step - reward: -1.9882 2 episodes - episode_reward: -427.592 [-433.562, -421.623] - loss: 16725119.000 - mae: 67867.766 - mean_q: 91527.391 Interval 3834 (1916500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.2265 3 episodes - episode_reward: -469.093 [-842.477, -100.000] - loss: 11856265.000 - mae: 67296.562 - mean_q: 90786.906 Interval 3835 (1917000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.0899 3 episodes - episode_reward: -499.459 [-714.148, -197.705] - loss: 11319357.000 - mae: 67279.352 - mean_q: 90837.352 Interval 3836 (1917500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6498 2 episodes - episode_reward: -323.427 [-523.869, -122.985] - loss: 12574328.000 - mae: 67903.711 - mean_q: 91708.016 Interval 3837 (1918000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6147 1 episodes - episode_reward: -875.053 [-875.053, -875.053] - loss: 12813162.000 - mae: 67575.336 - mean_q: 91276.422 Interval 3838 (1918500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.1997 Interval 3839 (1919000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.2477 3 episodes - episode_reward: -382.590 [-816.190, -106.834] - loss: 9079697.000 - mae: 67936.203 - mean_q: 91805.516 Interval 3840 (1919500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.3454 Interval 3841 (1920000 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -1.1398 Interval 3842 (1920500 steps performed) 500/500 [==============================] - 5s 10ms/step - reward: -0.6838 1 episodes - episode_reward: -1280.248 [-1280.248, -1280.248] - loss: 7733228.500 - mae: 69633.766 - mean_q: 94216.180 Interval 3843 (1921000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.7731 1 episodes - episode_reward: -205.936 [-205.936, -205.936] - loss: 7929980.000 - mae: 70409.438 - mean_q: 95276.227 Interval 3844 (1921500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.6447 2 episodes - episode_reward: -1697.454 [-2363.343, -1031.566] - loss: 8894328.000 - mae: 71490.109 - mean_q: 96772.602 Interval 3845 (1922000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4921 1 episodes - episode_reward: -402.509 [-402.509, -402.509] - loss: 10773275.000 - mae: 71996.102 - mean_q: 97570.625 Interval 3846 (1922500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.9562 3 episodes - episode_reward: -1119.733 [-2456.186, -450.782] - loss: 9047722.000 - mae: 73268.359 - mean_q: 99307.344 Interval 3847 (1923000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.4236 2 episodes - episode_reward: -730.515 [-985.785, -475.246] - loss: 7889736.500 - mae: 73790.812 - mean_q: 100128.719 Interval 3848 (1923500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.6086 3 episodes - episode_reward: -626.337 [-1027.824, -411.419] - loss: 10012968.000 - mae: 74173.422 - mean_q: 100520.242 Interval 3849 (1924000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.0543 3 episodes - episode_reward: -904.186 [-1073.233, -764.234] - loss: 13414582.000 - mae: 74664.547 - mean_q: 101010.070 Interval 3850 (1924500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.6226 4 episodes - episode_reward: -449.868 [-1006.534, -233.488] - loss: 11264254.000 - mae: 74620.797 - mean_q: 100951.781 Interval 3851 (1925000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.9405 2 episodes - episode_reward: -615.069 [-723.666, -506.472] - loss: 8954313.000 - mae: 74054.406 - mean_q: 100197.797 Interval 3852 (1925500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.1168 3 episodes - episode_reward: -585.328 [-722.353, -511.871] - loss: 14109644.000 - mae: 74327.219 - mean_q: 100454.523 Interval 3853 (1926000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.0192 3 episodes - episode_reward: -542.376 [-744.748, -229.275] - loss: 7375658.500 - mae: 73345.562 - mean_q: 99132.406 Interval 3854 (1926500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.0692 3 episodes - episode_reward: -511.977 [-617.781, -322.464] - loss: 15808060.000 - mae: 73139.336 - mean_q: 98757.086 Interval 3855 (1927000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.3853 6 episodes - episode_reward: -262.173 [-635.376, -114.750] - loss: 14638421.000 - mae: 72059.312 - mean_q: 97285.469 Interval 3856 (1927500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.2498 2 episodes - episode_reward: -289.254 [-295.916, -282.591] - loss: 26823838.000 - mae: 71586.289 - mean_q: 96593.547 Interval 3857 (1928000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.0720 3 episodes - episode_reward: -814.492 [-1813.552, -181.760] - loss: 12943963.000 - mae: 70765.078 - mean_q: 95399.633 Interval 3858 (1928500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.2709 3 episodes - episode_reward: -293.887 [-495.994, -153.389] - loss: 10482562.000 - mae: 70681.938 - mean_q: 95348.820 Interval 3859 (1929000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.3308 2 episodes - episode_reward: -1001.098 [-1835.969, -166.228] - loss: 6592567.500 - mae: 69920.523 - mean_q: 94265.703 Interval 3860 (1929500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.3296 3 episodes - episode_reward: -830.100 [-1881.392, -214.974] - loss: 12674303.000 - mae: 70270.062 - mean_q: 94626.828 Interval 3861 (1930000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.6952 4 episodes - episode_reward: -491.898 [-1289.743, -117.156] - loss: 31333060.000 - mae: 68947.344 - mean_q: 92764.398 Interval 3862 (1930500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.5507 4 episodes - episode_reward: -373.891 [-449.415, -331.173] - loss: 8628740.000 - mae: 67270.289 - mean_q: 90539.000 Interval 3863 (1931000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.7234 1 episodes - episode_reward: -1127.772 [-1127.772, -1127.772] - loss: 13502974.000 - mae: 66863.844 - mean_q: 89930.156 Interval 3864 (1931500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.4184 1 episodes - episode_reward: -1690.646 [-1690.646, -1690.646] - loss: 7007007.500 - mae: 65721.688 - mean_q: 88352.258 Interval 3865 (1932000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.4147 3 episodes - episode_reward: -764.433 [-1774.190, -237.662] - loss: 11104868.000 - mae: 65081.316 - mean_q: 87474.125 Interval 3866 (1932500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9896 4 episodes - episode_reward: -370.091 [-731.559, -161.017] - loss: 14541793.000 - mae: 63466.480 - mean_q: 85257.891 Interval 3867 (1933000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.8361 2 episodes - episode_reward: -319.365 [-347.416, -291.313] - loss: 6115902.000 - mae: 61823.785 - mean_q: 83078.250 Interval 3868 (1933500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.2497 4 episodes - episode_reward: -622.013 [-1788.528, -115.621] - loss: 9002720.000 - mae: 60217.758 - mean_q: 80875.336 Interval 3869 (1934000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.6116 1 episodes - episode_reward: -1188.077 [-1188.077, -1188.077] - loss: 6430928.500 - mae: 59621.633 - mean_q: 80105.070 Interval 3870 (1934500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4959 2 episodes - episode_reward: -605.375 [-1068.065, -142.685] - loss: 10817097.000 - mae: 58717.457 - mean_q: 78842.055 Interval 3871 (1935000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.3300 3 episodes - episode_reward: -602.411 [-1160.315, -278.505] - loss: 15007661.000 - mae: 56789.016 - mean_q: 76184.344 Interval 3872 (1935500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.8717 1 episodes - episode_reward: -2020.932 [-2020.932, -2020.932] - loss: 9954440.000 - mae: 55411.074 - mean_q: 74401.648 Interval 3873 (1936000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.1714 3 episodes - episode_reward: -280.663 [-365.416, -133.811] - loss: 6685695.000 - mae: 54068.605 - mean_q: 72635.555 Interval 3874 (1936500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.1663 6 episodes - episode_reward: -285.142 [-488.141, -142.663] - loss: 6543029.000 - mae: 52901.785 - mean_q: 71092.547 Interval 3875 (1937000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4853 1 episodes - episode_reward: -433.755 [-433.755, -433.755] - loss: 11791492.000 - mae: 52063.219 - mean_q: 70111.047 Interval 3876 (1937500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.6932 2 episodes - episode_reward: -884.081 [-972.868, -795.293] - loss: 3682514.500 - mae: 51566.020 - mean_q: 69590.859 Interval 3877 (1938000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3394 2 episodes - episode_reward: -274.981 [-287.290, -262.673] - loss: 4918047.000 - mae: 52154.008 - mean_q: 70485.727 Interval 3878 (1938500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5364 4 episodes - episode_reward: -209.270 [-301.590, -128.959] - loss: 10148089.000 - mae: 52403.457 - mean_q: 70833.883 Interval 3879 (1939000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.0568 2 episodes - episode_reward: -318.786 [-392.442, -245.131] - loss: 9769927.000 - mae: 53582.934 - mean_q: 72478.430 Interval 3880 (1939500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.7250 3 episodes - episode_reward: -242.790 [-280.044, -211.907] - loss: 9106934.000 - mae: 53636.367 - mean_q: 72495.969 Interval 3881 (1940000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.6582 3 episodes - episode_reward: -291.202 [-421.066, -128.994] - loss: 7674108.500 - mae: 53318.809 - mean_q: 71923.055 Interval 3882 (1940500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -5.1974 1 episodes - episode_reward: -1553.781 [-1553.781, -1553.781] - loss: 5942471.500 - mae: 53953.488 - mean_q: 72765.180 Interval 3883 (1941000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.2581 2 episodes - episode_reward: -1144.958 [-1385.380, -904.535] - loss: 14564904.000 - mae: 53581.000 - mean_q: 72126.352 Interval 3884 (1941500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -5.0538 1 episodes - episode_reward: -2243.461 [-2243.461, -2243.461] - loss: 11718002.000 - mae: 52693.223 - mean_q: 70950.594 Interval 3885 (1942000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -5.3415 2 episodes - episode_reward: -1744.421 [-2656.021, -832.821] - loss: 6047063.500 - mae: 52260.758 - mean_q: 70338.133 Interval 3886 (1942500 steps performed) 500/500 [==============================] - 4s 9ms/step - reward: -5.0730 Interval 3887 (1943000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.3602 3 episodes - episode_reward: -1560.475 [-2614.659, -979.159] - loss: 8644399.000 - mae: 50563.086 - mean_q: 68082.133 Interval 3888 (1943500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.2008 1 episodes - episode_reward: -1226.710 [-1226.710, -1226.710] - loss: 15235539.000 - mae: 50337.270 - mean_q: 67723.914 Interval 3889 (1944000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.7432 3 episodes - episode_reward: -1068.131 [-1217.496, -901.629] - loss: 5981310.500 - mae: 50018.527 - mean_q: 67339.070 Interval 3890 (1944500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.0703 2 episodes - episode_reward: -361.857 [-658.477, -65.237] - loss: 5003596.000 - mae: 49280.379 - mean_q: 66360.758 Interval 3891 (1945000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.3957 5 episodes - episode_reward: -585.395 [-1218.420, -100.000] - loss: 8734255.000 - mae: 48663.273 - mean_q: 65479.930 Interval 3892 (1945500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.1501 8 episodes - episode_reward: -252.957 [-477.042, -99.321] - loss: 4998955.500 - mae: 48221.031 - mean_q: 64852.000 Interval 3893 (1946000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.6894 2 episodes - episode_reward: -1071.693 [-2045.359, -98.026] - loss: 10373759.000 - mae: 46584.547 - mean_q: 62616.211 Interval 3894 (1946500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.9659 6 episodes - episode_reward: -529.675 [-1422.750, -100.000] - loss: 6888846.500 - mae: 45871.043 - mean_q: 61654.020 Interval 3895 (1947000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.9622 2 episodes - episode_reward: -831.880 [-992.501, -671.258] - loss: 7305807.500 - mae: 44600.664 - mean_q: 59998.699 Interval 3896 (1947500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.7418 7 episodes - episode_reward: -375.628 [-1062.781, -104.689] - loss: 9032633.000 - mae: 43641.281 - mean_q: 58671.012 Interval 3897 (1948000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.7435 4 episodes - episode_reward: -513.795 [-1143.464, -134.202] - loss: 3510831.750 - mae: 42546.293 - mean_q: 57285.391 Interval 3898 (1948500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -5.4442 5 episodes - episode_reward: -624.005 [-1391.466, -122.194] - loss: 5519371.000 - mae: 41991.062 - mean_q: 56540.496 Interval 3899 (1949000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.3453 4 episodes - episode_reward: -409.072 [-623.247, -173.882] - loss: 9322875.000 - mae: 41534.238 - mean_q: 55908.227 Interval 3900 (1949500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.2064 6 episodes - episode_reward: -365.147 [-681.660, -157.738] - loss: 4920731.000 - mae: 40936.277 - mean_q: 55189.020 Interval 3901 (1950000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.5029 2 episodes - episode_reward: -576.236 [-983.120, -169.351] - loss: 5274705.500 - mae: 40184.793 - mean_q: 54151.867 Interval 3902 (1950500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -5.4876 6 episodes - episode_reward: -543.112 [-1417.454, -188.653] - loss: 8781452.000 - mae: 39787.047 - mean_q: 53570.801 Interval 3903 (1951000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.4547 4 episodes - episode_reward: -257.678 [-529.675, -107.924] - loss: 6180014.500 - mae: 38882.691 - mean_q: 52335.762 Interval 3904 (1951500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4767 7 episodes - episode_reward: -368.885 [-1364.310, -100.000] - loss: 5811028.000 - mae: 38206.023 - mean_q: 51402.680 Interval 3905 (1952000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -3.1220 6 episodes - episode_reward: -211.993 [-592.223, -95.501] - loss: 5083331.500 - mae: 37349.574 - mean_q: 50189.629 Interval 3906 (1952500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4419 4 episodes - episode_reward: -510.058 [-1001.256, -104.564] - loss: 3528214.500 - mae: 36214.102 - mean_q: 48639.160 Interval 3907 (1953000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3471 4 episodes - episode_reward: -162.803 [-392.179, -50.349] - loss: 4006615.000 - mae: 35598.199 - mean_q: 47784.688 Interval 3908 (1953500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.8372 4 episodes - episode_reward: -351.610 [-1143.540, -35.575] - loss: 3873715.000 - mae: 34896.352 - mean_q: 46822.445 Interval 3909 (1954000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.8864 6 episodes - episode_reward: -240.834 [-641.413, -91.986] - loss: 3144205.750 - mae: 33890.578 - mean_q: 45445.617 Interval 3910 (1954500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1905 4 episodes - episode_reward: -214.856 [-423.873, -82.539] - loss: 3996208.250 - mae: 32799.102 - mean_q: 43921.398 Interval 3911 (1955000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4052 2 episodes - episode_reward: -428.855 [-461.961, -395.750] - loss: 2461635.250 - mae: 31739.721 - mean_q: 42508.285 Interval 3912 (1955500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.7498 2 episodes - episode_reward: -604.104 [-625.258, -582.951] - loss: 2682661.750 - mae: 30927.516 - mean_q: 41404.309 Interval 3913 (1956000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1767 3 episodes - episode_reward: -272.983 [-406.638, -181.819] - loss: 2825739.750 - mae: 29560.771 - mean_q: 39521.223 Interval 3914 (1956500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8859 2 episodes - episode_reward: -239.228 [-302.620, -175.837] - loss: 2586683.250 - mae: 28764.598 - mean_q: 38421.188 Interval 3915 (1957000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.4149 5 episodes - episode_reward: -338.809 [-694.363, -100.000] - loss: 2101582.000 - mae: 27730.447 - mean_q: 37051.852 Interval 3916 (1957500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.2714 6 episodes - episode_reward: -96.271 [-164.244, -35.339] - loss: 2233749.500 - mae: 26736.430 - mean_q: 35710.035 Interval 3917 (1958000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1733 5 episodes - episode_reward: -243.617 [-544.534, -107.496] - loss: 1965132.125 - mae: 25482.350 - mean_q: 34064.031 Interval 3918 (1958500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.9497 3 episodes - episode_reward: -275.443 [-484.206, -162.414] - loss: 2000249.125 - mae: 24788.018 - mean_q: 33175.309 Interval 3919 (1959000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.8005 5 episodes - episode_reward: -185.605 [-392.497, -78.393] - loss: 1886372.125 - mae: 24324.387 - mean_q: 32567.420 Interval 3920 (1959500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3227 3 episodes - episode_reward: -113.029 [-191.456, -53.541] - loss: 2095878.000 - mae: 24038.285 - mean_q: 32236.633 Interval 3921 (1960000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.1970 4 episodes - episode_reward: -482.891 [-688.737, -102.667] - loss: 2022490.250 - mae: 23289.869 - mean_q: 31315.184 Interval 3922 (1960500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -0.9018 3 episodes - episode_reward: -143.443 [-262.901, -22.249] - loss: 2290386.000 - mae: 23100.900 - mean_q: 31101.213 Interval 3923 (1961000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.9473 3 episodes - episode_reward: -339.324 [-452.060, -262.501] - loss: 2008618.750 - mae: 22997.578 - mean_q: 31045.068 Interval 3924 (1961500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.4941 3 episodes - episode_reward: -418.301 [-573.898, -131.580] - loss: 2509362.250 - mae: 23246.406 - mean_q: 31432.193 Interval 3925 (1962000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.1308 2 episodes - episode_reward: -271.374 [-433.872, -108.877] - loss: 2394502.000 - mae: 23253.312 - mean_q: 31494.803 Interval 3926 (1962500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.5074 1 episodes - episode_reward: -724.628 [-724.628, -724.628] - loss: 2438370.000 - mae: 23741.643 - mean_q: 32200.586 Interval 3927 (1963000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.1441 2 episodes - episode_reward: -318.850 [-485.038, -152.661] - loss: 2523222.000 - mae: 24039.543 - mean_q: 32553.025 Interval 3928 (1963500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -0.4334 Interval 3929 (1964000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.4132 3 episodes - episode_reward: -497.372 [-671.117, -390.159] - loss: 2680134.000 - mae: 23387.629 - mean_q: 31488.730 Interval 3930 (1964500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.6582 3 episodes - episode_reward: -366.965 [-673.363, 7.328] - loss: 2635583.500 - mae: 22879.119 - mean_q: 30786.773 Interval 3931 (1965000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.5523 2 episodes - episode_reward: -623.673 [-635.768, -611.577] - loss: 2373828.250 - mae: 22654.047 - mean_q: 30520.953 Interval 3932 (1965500 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -2.6403 3 episodes - episode_reward: -372.206 [-490.179, -237.479] - loss: 2367632.250 - mae: 22426.107 - mean_q: 30234.729 Interval 3933 (1966000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1061 4 episodes - episode_reward: -428.246 [-478.721, -393.323] - loss: 2506260.000 - mae: 22236.557 - mean_q: 29985.119 Interval 3934 (1966500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.3241 5 episodes - episode_reward: -274.949 [-430.043, -91.830] - loss: 3039063.500 - mae: 22230.574 - mean_q: 29945.072 Interval 3935 (1967000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.5799 5 episodes - episode_reward: -446.647 [-591.346, -217.190] - loss: 2525572.750 - mae: 22079.725 - mean_q: 29711.027 Interval 3936 (1967500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9212 4 episodes - episode_reward: -329.133 [-406.283, -188.758] - loss: 2830541.250 - mae: 21430.002 - mean_q: 28796.729 Interval 3937 (1968000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.9843 4 episodes - episode_reward: -425.003 [-506.752, -291.324] - loss: 2309327.250 - mae: 20570.295 - mean_q: 27619.363 Interval 3938 (1968500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.1233 3 episodes - episode_reward: -338.008 [-640.117, -173.109] - loss: 2016209.875 - mae: 20176.053 - mean_q: 27136.398 Interval 3939 (1969000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0552 2 episodes - episode_reward: -302.540 [-303.901, -301.179] - loss: 2240692.250 - mae: 20160.756 - mean_q: 27115.516 Interval 3940 (1969500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4460 1 episodes - episode_reward: -1217.280 [-1217.280, -1217.280] - loss: 2121755.250 - mae: 19975.557 - mean_q: 26863.189 Interval 3941 (1970000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.2755 3 episodes - episode_reward: -1031.151 [-1384.774, -459.956] - loss: 2398615.750 - mae: 19796.816 - mean_q: 26603.701 Interval 3942 (1970500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.0120 2 episodes - episode_reward: -677.577 [-1035.039, -320.115] - loss: 2276055.750 - mae: 19706.668 - mean_q: 26484.971 Interval 3943 (1971000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.6464 3 episodes - episode_reward: -579.100 [-1104.373, -166.526] - loss: 2711152.500 - mae: 19531.768 - mean_q: 26255.100 Interval 3944 (1971500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.8392 3 episodes - episode_reward: -503.490 [-704.331, -380.600] - loss: 2533149.750 - mae: 19247.518 - mean_q: 25850.424 Interval 3945 (1972000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.4252 4 episodes - episode_reward: -556.453 [-894.437, -100.000] - loss: 2818819.500 - mae: 19435.727 - mean_q: 26071.549 Interval 3946 (1972500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.0752 2 episodes - episode_reward: -662.681 [-995.763, -329.599] - loss: 2989243.750 - mae: 18898.457 - mean_q: 25295.635 Interval 3947 (1973000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.6843 6 episodes - episode_reward: -493.064 [-1047.729, -197.613] - loss: 2558192.250 - mae: 19024.072 - mean_q: 25508.521 Interval 3948 (1973500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.9475 2 episodes - episode_reward: -751.874 [-813.349, -690.399] - loss: 2628743.500 - mae: 18785.490 - mean_q: 25185.285 Interval 3949 (1974000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.2821 2 episodes - episode_reward: -743.090 [-845.443, -640.737] - loss: 2532508.750 - mae: 19143.932 - mean_q: 25696.965 Interval 3950 (1974500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -2.1568 2 episodes - episode_reward: -601.799 [-969.617, -233.982] - loss: 2710874.250 - mae: 19125.695 - mean_q: 25685.549 Interval 3951 (1975000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1376 3 episodes - episode_reward: -582.639 [-858.741, -155.002] - loss: 2497075.750 - mae: 18640.572 - mean_q: 25055.293 Interval 3952 (1975500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.1691 2 episodes - episode_reward: -797.885 [-921.667, -674.102] - loss: 2623409.250 - mae: 18358.730 - mean_q: 24655.211 Interval 3953 (1976000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.3145 3 episodes - episode_reward: -539.260 [-732.867, -428.427] - loss: 3502033.750 - mae: 18814.881 - mean_q: 25292.404 Interval 3954 (1976500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.4028 3 episodes - episode_reward: -720.687 [-851.515, -499.722] - loss: 3299025.250 - mae: 18348.416 - mean_q: 24655.596 Interval 3955 (1977000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.9453 3 episodes - episode_reward: -688.564 [-1155.847, -175.523] - loss: 2923747.750 - mae: 18340.689 - mean_q: 24688.906 Interval 3956 (1977500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.3014 2 episodes - episode_reward: -807.902 [-982.485, -633.319] - loss: 3500140.000 - mae: 18372.248 - mean_q: 24778.420 Interval 3957 (1978000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -5.4262 5 episodes - episode_reward: -558.013 [-1234.974, -108.405] - loss: 3657844.500 - mae: 18117.812 - mean_q: 24443.955 Interval 3958 (1978500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.7348 2 episodes - episode_reward: -826.970 [-982.674, -671.265] - loss: 2874371.250 - mae: 18636.557 - mean_q: 25177.086 Interval 3959 (1979000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.6392 5 episodes - episode_reward: -506.445 [-1002.751, -161.504] - loss: 3606230.000 - mae: 18547.037 - mean_q: 25105.279 Interval 3960 (1979500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -5.0786 4 episodes - episode_reward: -584.953 [-743.630, -255.659] - loss: 2622651.750 - mae: 18461.359 - mean_q: 25025.584 Interval 3961 (1980000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.4942 3 episodes - episode_reward: -488.651 [-729.266, -156.177] - loss: 3076631.250 - mae: 18742.990 - mean_q: 25376.814 Interval 3962 (1980500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.8401 5 episodes - episode_reward: -425.075 [-779.624, -100.000] - loss: 2784437.250 - mae: 18893.057 - mean_q: 25549.797 Interval 3963 (1981000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.9648 2 episodes - episode_reward: -1047.033 [-1134.632, -959.433] - loss: 2467862.750 - mae: 19025.010 - mean_q: 25757.730 Interval 3964 (1981500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.3264 5 episodes - episode_reward: -554.057 [-1206.792, -143.606] - loss: 2620413.000 - mae: 19110.246 - mean_q: 25908.193 Interval 3965 (1982000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.2799 4 episodes - episode_reward: -498.573 [-837.412, -80.603] - loss: 2591355.750 - mae: 19287.678 - mean_q: 26142.188 Interval 3966 (1982500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.5207 2 episodes - episode_reward: -906.174 [-998.430, -813.919] - loss: 2371576.250 - mae: 19112.324 - mean_q: 25900.963 Interval 3967 (1983000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.1042 4 episodes - episode_reward: -417.166 [-673.050, -157.182] - loss: 2547013.750 - mae: 18974.809 - mean_q: 25701.910 Interval 3968 (1983500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.3862 3 episodes - episode_reward: -453.685 [-676.021, -87.376] - loss: 2383600.750 - mae: 18936.750 - mean_q: 25691.613 Interval 3969 (1984000 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.4241 3 episodes - episode_reward: -594.408 [-811.613, -202.384] - loss: 2428737.500 - mae: 18785.373 - mean_q: 25456.123 Interval 3970 (1984500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.8855 4 episodes - episode_reward: -558.346 [-835.259, -114.750] - loss: 2337150.750 - mae: 18576.455 - mean_q: 25171.807 Interval 3971 (1985000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.3313 6 episodes - episode_reward: -247.652 [-702.094, -100.000] - loss: 2250811.500 - mae: 17961.818 - mean_q: 24339.236 Interval 3972 (1985500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -4.2634 5 episodes - episode_reward: -441.484 [-1123.797, -100.000] - loss: 2758052.250 - mae: 17755.738 - mean_q: 24008.967 Interval 3973 (1986000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.3722 5 episodes - episode_reward: -452.699 [-891.204, -100.862] - loss: 2530207.000 - mae: 17402.996 - mean_q: 23565.584 Interval 3974 (1986500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.0220 5 episodes - episode_reward: -305.938 [-714.610, -103.502] - loss: 2185143.250 - mae: 16689.586 - mean_q: 22574.766 Interval 3975 (1987000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.0420 2 episodes - episode_reward: -530.978 [-590.908, -471.048] - loss: 2262567.750 - mae: 16100.848 - mean_q: 21783.494 Interval 3976 (1987500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.1255 2 episodes - episode_reward: -1162.232 [-1404.188, -920.276] - loss: 1861648.000 - mae: 15444.831 - mean_q: 20842.832 Interval 3977 (1988000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.6443 5 episodes - episode_reward: -384.350 [-650.195, -156.686] - loss: 2167445.750 - mae: 14855.811 - mean_q: 20056.539 Interval 3978 (1988500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.2882 6 episodes - episode_reward: -448.116 [-671.059, -89.894] - loss: 1995523.625 - mae: 14219.877 - mean_q: 19178.566 Interval 3979 (1989000 steps performed) 500/500 [==============================] - 3s 7ms/step - reward: -4.0914 4 episodes - episode_reward: -476.849 [-670.440, -165.822] - loss: 1922776.875 - mae: 13688.632 - mean_q: 18453.104 Interval 3980 (1989500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.0777 4 episodes - episode_reward: -661.989 [-753.657, -495.144] - loss: 1943360.250 - mae: 13439.135 - mean_q: 18131.855 Interval 3981 (1990000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.1011 5 episodes - episode_reward: -389.492 [-703.242, -236.895] - loss: 1841945.875 - mae: 13252.136 - mean_q: 17905.008 Interval 3982 (1990500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.8784 4 episodes - episode_reward: -660.810 [-858.600, -156.643] - loss: 1846990.500 - mae: 12976.461 - mean_q: 17544.434 Interval 3983 (1991000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.2999 3 episodes - episode_reward: -489.101 [-709.331, -289.387] - loss: 1639217.750 - mae: 12984.386 - mean_q: 17567.748 Interval 3984 (1991500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.7356 3 episodes - episode_reward: -439.207 [-569.883, -233.064] - loss: 1772726.125 - mae: 12788.931 - mean_q: 17290.084 Interval 3985 (1992000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.5688 6 episodes - episode_reward: -381.037 [-842.220, -81.803] - loss: 1615377.375 - mae: 12523.144 - mean_q: 16952.576 Interval 3986 (1992500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -2.0252 2 episodes - episode_reward: -414.462 [-577.631, -251.292] - loss: 1478615.375 - mae: 12288.868 - mean_q: 16628.059 Interval 3987 (1993000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.0014 2 episodes - episode_reward: -1361.065 [-1982.044, -740.086] - loss: 1489159.625 - mae: 11931.988 - mean_q: 16128.506 Interval 3988 (1993500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.3269 6 episodes - episode_reward: -282.852 [-510.955, -100.000] - loss: 1481355.750 - mae: 11706.459 - mean_q: 15816.058 Interval 3989 (1994000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.2833 2 episodes - episode_reward: -1161.218 [-1318.162, -1004.274] - loss: 1321301.125 - mae: 11084.045 - mean_q: 14959.362 Interval 3990 (1994500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.2347 2 episodes - episode_reward: -797.415 [-1394.228, -200.601] - loss: 1325593.750 - mae: 10578.282 - mean_q: 14247.677 Interval 3991 (1995000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.5596 4 episodes - episode_reward: -692.436 [-1245.964, -330.935] - loss: 1317029.000 - mae: 10447.541 - mean_q: 14067.152 Interval 3992 (1995500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.5028 3 episodes - episode_reward: -583.475 [-1026.255, -189.444] - loss: 1230184.625 - mae: 9827.113 - mean_q: 13229.047 Interval 3993 (1996000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.5863 5 episodes - episode_reward: -418.434 [-776.939, -142.688] - loss: 1178628.625 - mae: 9365.071 - mean_q: 12604.801 Interval 3994 (1996500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -6.4620 4 episodes - episode_reward: -323.525 [-813.660, -149.768] - loss: 1047814.250 - mae: 8984.348 - mean_q: 12092.192 Interval 3995 (1997000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -4.4711 3 episodes - episode_reward: -1195.977 [-2977.501, -123.857] - loss: 1077278.125 - mae: 8518.796 - mean_q: 11455.770 Interval 3996 (1997500 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -5.8910 3 episodes - episode_reward: -1161.622 [-2740.984, -132.728] - loss: 1127997.000 - mae: 8131.399 - mean_q: 10928.322 Interval 3997 (1998000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -3.8406 3 episodes - episode_reward: -691.001 [-1444.765, -263.471] - loss: 1053658.500 - mae: 7990.007 - mean_q: 10743.387 Interval 3998 (1998500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -3.4840 2 episodes - episode_reward: -861.422 [-878.062, -844.782] - loss: 1123485.250 - mae: 7784.017 - mean_q: 10477.657 Interval 3999 (1999000 steps performed) 500/500 [==============================] - 4s 7ms/step - reward: -1.3835 2 episodes - episode_reward: -342.140 [-380.871, -303.409] - loss: 1159623.625 - mae: 7841.388 - mean_q: 10580.843 Interval 4000 (1999500 steps performed) 500/500 [==============================] - 4s 8ms/step - reward: -1.4734 done, took 13669.967 seconds
weights.append(f'dqn_lunar_weights_six.h5f')
models.append(dqn)
models[-1].load_weights(weights[-1])
df = pd.DataFrame(history.history)
ax = df['episode_reward'].plot(color = 'lightgray')
df['episode_reward'].rolling(50).mean().plot(color = 'black')
ax.set_xlabel("Episode")
plt.ylabel("Rolling Mean (10) Cumulative Return")
plt.show()
Again, the average hovers around zero, and we are still seeing big negative numbers, near to the end of training.
df.to_csv('lunar_training_weights_6')
I felt that the next logical step was to increase memory size, hoping that this would allow the model to better capture some of the complexities of the game. I also increased the log interval, as this felt like a sensible step to take when increasing the sequential memory limit.
rl['Model 7'] = [6, '128/64/32', 250000, 100000, 1500, None]
# neural netwok model
model = Sequential()
model.add(Flatten(input_shape=(6,) + env.observation_space.shape))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
memory = SequentialMemory(limit=100000, window_length=6)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=30,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
weights_filename = f'dqn_lunar_weights_seven.h5f'
checkpoint_weights_filename = 'dqn_lunar_weights_{step}.h5f'
log_filename = f'dqn_lunar_log.json'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]
start_time = time.time()
history = dqn.fit(env, callbacks=callbacks, nb_steps=250000, log_interval=1500)
end_time = time.time()
Training for 250000 steps ... Interval 1 (0 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: -0.1601 1 episodes - episode_reward: -66.539 [-66.539, -66.539] - loss: 1.291 - mae: 38.404 - mean_q: 49.186 Interval 2 (1500 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: -0.1595 1 episodes - episode_reward: -305.485 [-305.485, -305.485] - loss: 1.495 - mae: 23.712 - mean_q: 30.829 Interval 3 (3000 steps performed) 1500/1500 [==============================] - 21s 14ms/step - reward: -0.1487 Interval 4 (4500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: -0.0162 1 episodes - episode_reward: -222.142 [-222.142, -222.142] - loss: 2.277 - mae: 22.448 - mean_q: 30.203 Interval 5 (6000 steps performed) 1500/1500 [==============================] - 16s 11ms/step - reward: -0.1745 Interval 6 (7500 steps performed) 1500/1500 [==============================] - 18s 12ms/step - reward: -0.1661 Interval 7 (9000 steps performed) 1500/1500 [==============================] - 19s 13ms/step - reward: -0.3848 2 episodes - episode_reward: -615.642 [-723.175, -508.109] - loss: 1.154 - mae: 17.161 - mean_q: 23.206 Interval 8 (10500 steps performed) 1500/1500 [==============================] - 11s 7ms/step - reward: -0.9510 5 episodes - episode_reward: -286.132 [-552.793, -100.000] - loss: 4.476 - mae: 18.798 - mean_q: 25.073 Interval 9 (12000 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: -0.4834 2 episodes - episode_reward: -325.060 [-325.763, -324.357] - loss: 4.361 - mae: 21.504 - mean_q: 28.057 Interval 10 (13500 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: -0.3445 3 episodes - episode_reward: -136.207 [-462.108, 39.115] - loss: 4.080 - mae: 24.246 - mean_q: 30.221 Interval 11 (15000 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: -0.6982 5 episodes - episode_reward: -236.363 [-415.955, 6.597] - loss: 5.502 - mae: 25.021 - mean_q: 29.989 Interval 12 (16500 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: -0.2617 1 episodes - episode_reward: -309.756 [-309.756, -309.756] - loss: 5.479 - mae: 26.091 - mean_q: 30.175 Interval 13 (18000 steps performed) 1500/1500 [==============================] - 13s 8ms/step - reward: 0.0703 1 episodes - episode_reward: 44.324 [44.324, 44.324] - loss: 3.893 - mae: 25.475 - mean_q: 30.210 Interval 14 (19500 steps performed) 1500/1500 [==============================] - 17s 12ms/step - reward: 0.0131 1 episodes - episode_reward: -37.191 [-37.191, -37.191] - loss: 4.069 - mae: 26.072 - mean_q: 31.320 Interval 15 (21000 steps performed) 1500/1500 [==============================] - 11s 8ms/step - reward: -0.1848 3 episodes - episode_reward: -102.956 [-116.324, -77.114] - loss: 3.966 - mae: 26.195 - mean_q: 31.791 Interval 16 (22500 steps performed) 1500/1500 [==============================] - 13s 9ms/step - reward: -0.1239 3 episodes - episode_reward: -52.316 [-140.916, 108.062] - loss: 5.518 - mae: 26.445 - mean_q: 32.681 Interval 17 (24000 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: -0.0037 2 episodes - episode_reward: 7.928 [-82.264, 98.120] - loss: 3.805 - mae: 26.861 - mean_q: 33.667 Interval 18 (25500 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: -0.1124 1 episodes - episode_reward: -176.875 [-176.875, -176.875] - loss: 4.490 - mae: 28.942 - mean_q: 35.823 Interval 19 (27000 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: 0.1526 2 episodes - episode_reward: 126.332 [104.099, 148.564] - loss: 4.382 - mae: 31.540 - mean_q: 39.060 Interval 20 (28500 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.0069 1 episodes - episode_reward: 69.027 [69.027, 69.027] - loss: 3.997 - mae: 30.680 - mean_q: 38.015 Interval 21 (30000 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: 0.1038 2 episodes - episode_reward: 38.358 [-9.425, 86.140] - loss: 4.068 - mae: 30.214 - mean_q: 37.317 Interval 22 (31500 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: 0.0086 1 episodes - episode_reward: 66.582 [66.582, 66.582] - loss: 2.916 - mae: 29.702 - mean_q: 36.349 Interval 23 (33000 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: 0.1768 2 episodes - episode_reward: 43.365 [-41.301, 128.030] - loss: 3.515 - mae: 30.630 - mean_q: 37.838 Interval 24 (34500 steps performed) 1500/1500 [==============================] - 12s 8ms/step - reward: -0.4231 6 episodes - episode_reward: -72.439 [-141.912, 208.114] - loss: 5.725 - mae: 30.972 - mean_q: 38.446 Interval 25 (36000 steps performed) 1500/1500 [==============================] - 13s 8ms/step - reward: -0.1413 2 episodes - episode_reward: -114.566 [-415.014, 185.882] - loss: 4.901 - mae: 30.729 - mean_q: 37.634 Interval 26 (37500 steps performed) 1500/1500 [==============================] - 13s 8ms/step - reward: 0.1617 3 episodes - episode_reward: 59.376 [-92.262, 229.364] - loss: 4.412 - mae: 30.506 - mean_q: 37.416 Interval 27 (39000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.0784 1 episodes - episode_reward: 84.470 [84.470, 84.470] - loss: 4.696 - mae: 30.463 - mean_q: 37.713 Interval 28 (40500 steps performed) 1500/1500 [==============================] - 13s 9ms/step - reward: 0.0382 1 episodes - episode_reward: 64.778 [64.778, 64.778] - loss: 4.659 - mae: 30.495 - mean_q: 38.186 Interval 29 (42000 steps performed) 1500/1500 [==============================] - 13s 9ms/step - reward: -0.0530 2 episodes - episode_reward: 12.216 [-96.928, 121.360] - loss: 3.845 - mae: 30.471 - mean_q: 38.011 Interval 30 (43500 steps performed) 1500/1500 [==============================] - 13s 8ms/step - reward: -0.0020 2 episodes - episode_reward: -63.658 [-266.800, 139.484] - loss: 4.962 - mae: 30.366 - mean_q: 38.113 Interval 31 (45000 steps performed) 1500/1500 [==============================] - 13s 9ms/step - reward: 0.1311 1 episodes - episode_reward: 186.087 [186.087, 186.087] - loss: 4.093 - mae: 30.142 - mean_q: 38.123 Interval 32 (46500 steps performed) 1500/1500 [==============================] - 13s 9ms/step - reward: 0.0799 2 episodes - episode_reward: 100.987 [53.762, 148.213] - loss: 4.239 - mae: 29.983 - mean_q: 37.830 Interval 33 (48000 steps performed) 1500/1500 [==============================] - 13s 8ms/step - reward: -0.5174 6 episodes - episode_reward: -123.114 [-212.281, -87.206] - loss: 4.591 - mae: 29.639 - mean_q: 37.503 Interval 34 (49500 steps performed) 1500/1500 [==============================] - 16s 11ms/step - reward: -0.1708 Interval 35 (51000 steps performed) 1500/1500 [==============================] - 19s 13ms/step - reward: -0.1774 Interval 36 (52500 steps performed) 1500/1500 [==============================] - 22s 15ms/step - reward: -0.0960 Interval 37 (54000 steps performed) 1500/1500 [==============================] - 18s 12ms/step - reward: 0.0501 1 episodes - episode_reward: -669.420 [-669.420, -669.420] - loss: 4.313 - mae: 30.118 - mean_q: 38.341 Interval 38 (55500 steps performed) 1500/1500 [==============================] - 18s 12ms/step - reward: -0.1186 Interval 39 (57000 steps performed) 1500/1500 [==============================] - 13s 9ms/step - reward: 0.2653 4 episodes - episode_reward: 51.739 [-68.685, 179.116] - loss: 5.421 - mae: 30.466 - mean_q: 38.577 Interval 40 (58500 steps performed) 1500/1500 [==============================] - 13s 8ms/step - reward: -0.0799 3 episodes - episode_reward: -30.447 [-139.248, 162.880] - loss: 5.265 - mae: 30.434 - mean_q: 38.777 Interval 41 (60000 steps performed) 1500/1500 [==============================] - 13s 8ms/step - reward: 0.2033 3 episodes - episode_reward: 64.953 [-44.410, 242.466] - loss: 4.950 - mae: 30.427 - mean_q: 38.813 Interval 42 (61500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: -0.1418 1 episodes - episode_reward: -117.163 [-117.163, -117.163] - loss: 5.717 - mae: 30.659 - mean_q: 39.268 Interval 43 (63000 steps performed) 1500/1500 [==============================] - 13s 8ms/step - reward: -0.1517 4 episodes - episode_reward: -48.660 [-136.614, 114.624] - loss: 4.447 - mae: 30.945 - mean_q: 39.568 Interval 44 (64500 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: -0.1007 Interval 45 (66000 steps performed) 1500/1500 [==============================] - 13s 9ms/step - reward: 0.2642 3 episodes - episode_reward: 60.731 [-280.270, 252.201] - loss: 5.128 - mae: 31.266 - mean_q: 39.874 Interval 46 (67500 steps performed) 1500/1500 [==============================] - 13s 9ms/step - reward: 0.2283 2 episodes - episode_reward: 155.116 [84.471, 225.761] - loss: 4.002 - mae: 31.275 - mean_q: 40.250 Interval 47 (69000 steps performed) 1500/1500 [==============================] - 13s 9ms/step - reward: 0.2108 3 episodes - episode_reward: 99.880 [-152.642, 248.682] - loss: 4.664 - mae: 31.196 - mean_q: 40.045 Interval 48 (70500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: -0.0466 2 episodes - episode_reward: 17.082 [-132.103, 166.266] - loss: 4.889 - mae: 31.132 - mean_q: 39.951 Interval 49 (72000 steps performed) 1500/1500 [==============================] - 16s 11ms/step - reward: -0.0098 Interval 50 (73500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.1807 3 episodes - episode_reward: 42.778 [-107.891, 182.750] - loss: 4.789 - mae: 31.032 - mean_q: 40.428 Interval 51 (75000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.0967 2 episodes - episode_reward: 137.149 [120.242, 154.056] - loss: 5.840 - mae: 30.799 - mean_q: 40.510 Interval 52 (76500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.2626 2 episodes - episode_reward: 180.096 [178.539, 181.654] - loss: 5.086 - mae: 30.745 - mean_q: 40.320 Interval 53 (78000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.2391 2 episodes - episode_reward: 164.895 [162.796, 166.994] - loss: 5.947 - mae: 30.490 - mean_q: 39.508 Interval 54 (79500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.0944 1 episodes - episode_reward: 105.804 [105.804, 105.804] - loss: 3.912 - mae: 30.225 - mean_q: 39.103 Interval 55 (81000 steps performed) 1500/1500 [==============================] - 16s 11ms/step - reward: 0.0455 1 episodes - episode_reward: 162.051 [162.051, 162.051] - loss: 4.676 - mae: 30.035 - mean_q: 38.945 Interval 56 (82500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.2645 2 episodes - episode_reward: 151.998 [79.605, 224.392] - loss: 5.101 - mae: 29.955 - mean_q: 39.010 Interval 57 (84000 steps performed) 1500/1500 [==============================] - 13s 9ms/step - reward: 0.2194 7 episodes - episode_reward: 52.046 [-142.518, 276.310] - loss: 4.029 - mae: 30.403 - mean_q: 39.476 Interval 58 (85500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.1634 3 episodes - episode_reward: 60.750 [-297.043, 265.628] - loss: 5.343 - mae: 30.807 - mean_q: 40.106 Interval 59 (87000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.3832 4 episodes - episode_reward: 154.811 [-91.275, 260.627] - loss: 5.830 - mae: 31.288 - mean_q: 40.599 Interval 60 (88500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.2200 2 episodes - episode_reward: 146.201 [84.798, 207.604] - loss: 5.291 - mae: 31.289 - mean_q: 40.835 Interval 61 (90000 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.0310 4 episodes - episode_reward: 37.938 [-193.335, 249.070] - loss: 5.130 - mae: 32.048 - mean_q: 41.413 Interval 62 (91500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.2746 2 episodes - episode_reward: 187.438 [155.284, 219.592] - loss: 5.562 - mae: 32.258 - mean_q: 41.600 Interval 63 (93000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.0395 2 episodes - episode_reward: 21.308 [-102.817, 145.433] - loss: 7.169 - mae: 33.249 - mean_q: 43.084 Interval 64 (94500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.2666 2 episodes - episode_reward: 172.357 [127.449, 217.265] - loss: 6.333 - mae: 34.201 - mean_q: 44.515 Interval 65 (96000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.1947 5 episodes - episode_reward: 82.356 [-150.389, 264.440] - loss: 4.993 - mae: 35.504 - mean_q: 46.784 Interval 66 (97500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: -0.1196 3 episodes - episode_reward: -85.303 [-240.354, 101.317] - loss: 5.126 - mae: 36.793 - mean_q: 48.373 Interval 67 (99000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.3820 3 episodes - episode_reward: 209.699 [171.447, 240.006] - loss: 7.110 - mae: 37.554 - mean_q: 49.247 Interval 68 (100500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.1049 1 episodes - episode_reward: 232.128 [232.128, 232.128] - loss: 5.298 - mae: 37.678 - mean_q: 49.445 Interval 69 (102000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.1489 5 episodes - episode_reward: 15.903 [-118.738, 261.163] - loss: 4.838 - mae: 37.245 - mean_q: 48.849 Interval 70 (103500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.5285 3 episodes - episode_reward: 261.874 [230.997, 312.622] - loss: 6.973 - mae: 37.202 - mean_q: 48.477 Interval 71 (105000 steps performed) 1500/1500 [==============================] - 17s 11ms/step - reward: 0.0964 3 episodes - episode_reward: 65.948 [-145.573, 251.816] - loss: 4.925 - mae: 37.151 - mean_q: 48.468 Interval 72 (106500 steps performed) 1500/1500 [==============================] - 16s 10ms/step - reward: 0.2863 2 episodes - episode_reward: 207.677 [182.384, 232.969] - loss: 5.337 - mae: 36.898 - mean_q: 47.978 Interval 73 (108000 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.4017 2 episodes - episode_reward: 265.366 [264.569, 266.163] - loss: 4.958 - mae: 36.929 - mean_q: 48.037 Interval 74 (109500 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.2039 3 episodes - episode_reward: 102.168 [-144.439, 227.537] - loss: 6.548 - mae: 37.090 - mean_q: 48.081 Interval 75 (111000 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.3862 7 episodes - episode_reward: 87.802 [-115.956, 229.387] - loss: 5.924 - mae: 36.741 - mean_q: 47.731 Interval 76 (112500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.3112 4 episodes - episode_reward: 123.564 [-156.114, 226.703] - loss: 6.505 - mae: 37.054 - mean_q: 48.864 Interval 77 (114000 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.4525 4 episodes - episode_reward: 158.722 [-110.168, 298.122] - loss: 5.576 - mae: 38.305 - mean_q: 50.522 Interval 78 (115500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4326 4 episodes - episode_reward: 195.417 [175.781, 222.364] - loss: 5.878 - mae: 38.713 - mean_q: 51.240 Interval 79 (117000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.2095 3 episodes - episode_reward: 83.085 [-184.183, 255.479] - loss: 5.461 - mae: 39.472 - mean_q: 52.250 Interval 80 (118500 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.2780 4 episodes - episode_reward: 114.733 [-99.855, 216.467] - loss: 7.215 - mae: 39.794 - mean_q: 52.665 Interval 81 (120000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.1434 3 episodes - episode_reward: 63.412 [-206.030, 232.246] - loss: 6.628 - mae: 39.994 - mean_q: 52.768 Interval 82 (121500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.1548 3 episodes - episode_reward: 80.126 [-191.951, 296.569] - loss: 5.429 - mae: 40.003 - mean_q: 53.002 Interval 83 (123000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: -0.2667 3 episodes - episode_reward: -124.595 [-398.770, 208.067] - loss: 5.526 - mae: 40.381 - mean_q: 53.328 Interval 84 (124500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.3204 4 episodes - episode_reward: 121.217 [-183.026, 276.716] - loss: 5.453 - mae: 40.535 - mean_q: 53.461 Interval 85 (126000 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.4250 4 episodes - episode_reward: 131.041 [-215.102, 293.599] - loss: 6.822 - mae: 40.323 - mean_q: 53.213 Interval 86 (127500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.3920 3 episodes - episode_reward: 232.722 [187.415, 258.685] - loss: 6.877 - mae: 40.285 - mean_q: 53.041 Interval 87 (129000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.2389 2 episodes - episode_reward: 157.911 [134.447, 181.375] - loss: 6.146 - mae: 40.242 - mean_q: 53.071 Interval 88 (130500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: -0.0553 6 episodes - episode_reward: -27.223 [-121.338, 269.417] - loss: 7.010 - mae: 40.410 - mean_q: 53.192 Interval 89 (132000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.0654 5 episodes - episode_reward: 9.386 [-138.585, 249.179] - loss: 7.589 - mae: 40.371 - mean_q: 53.134 Interval 90 (133500 steps performed) 1500/1500 [==============================] - 16s 10ms/step - reward: 0.6119 4 episodes - episode_reward: 265.948 [215.252, 323.898] - loss: 7.607 - mae: 40.163 - mean_q: 52.963 Interval 91 (135000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.0311 7 episodes - episode_reward: 14.691 [-240.664, 233.778] - loss: 6.010 - mae: 39.975 - mean_q: 52.569 Interval 92 (136500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.5394 5 episodes - episode_reward: 116.861 [-100.000, 269.347] - loss: 8.073 - mae: 40.336 - mean_q: 53.191 Interval 93 (138000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.2335 2 episodes - episode_reward: 262.950 [256.297, 269.602] - loss: 6.650 - mae: 40.402 - mean_q: 53.217 Interval 94 (139500 steps performed) 1500/1500 [==============================] - 16s 11ms/step - reward: 0.3336 2 episodes - episode_reward: 191.428 [123.362, 259.494] - loss: 6.879 - mae: 40.705 - mean_q: 53.515 Interval 95 (141000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.7268 6 episodes - episode_reward: 194.554 [-100.000, 321.145] - loss: 6.441 - mae: 41.366 - mean_q: 54.743 Interval 96 (142500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.2205 2 episodes - episode_reward: 155.067 [75.072, 235.063] - loss: 7.180 - mae: 41.636 - mean_q: 55.123 Interval 97 (144000 steps performed) 1500/1500 [==============================] - 16s 11ms/step - reward: 0.2270 2 episodes - episode_reward: 191.143 [124.335, 257.950] - loss: 7.566 - mae: 42.262 - mean_q: 55.895 Interval 98 (145500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.3327 3 episodes - episode_reward: 98.756 [-135.124, 300.906] - loss: 7.872 - mae: 42.317 - mean_q: 55.999 Interval 99 (147000 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.2095 5 episodes - episode_reward: 92.549 [-157.133, 315.414] - loss: 6.866 - mae: 43.172 - mean_q: 56.987 Interval 100 (148500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.0576 4 episodes - episode_reward: -4.825 [-244.445, 206.979] - loss: 6.636 - mae: 43.657 - mean_q: 57.523 Interval 101 (150000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4267 6 episodes - episode_reward: 112.047 [-200.513, 333.043] - loss: 7.872 - mae: 43.730 - mean_q: 57.493 Interval 102 (151500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4313 7 episodes - episode_reward: 92.731 [-163.493, 294.236] - loss: 8.244 - mae: 44.737 - mean_q: 58.714 Interval 103 (153000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.2351 8 episodes - episode_reward: 51.177 [-356.712, 290.734] - loss: 7.462 - mae: 45.751 - mean_q: 59.838 Interval 104 (154500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.5420 4 episodes - episode_reward: 252.596 [242.305, 264.687] - loss: 7.340 - mae: 46.633 - mean_q: 60.836 Interval 105 (156000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4092 6 episodes - episode_reward: 85.251 [-203.633, 310.813] - loss: 8.573 - mae: 47.133 - mean_q: 61.410 Interval 106 (157500 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.4855 5 episodes - episode_reward: 137.639 [-264.752, 259.537] - loss: 7.360 - mae: 47.067 - mean_q: 61.613 Interval 107 (159000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.4787 3 episodes - episode_reward: 247.458 [235.097, 266.665] - loss: 7.270 - mae: 46.910 - mean_q: 61.283 Interval 108 (160500 steps performed) 1500/1500 [==============================] - 16s 11ms/step - reward: 0.1308 2 episodes - episode_reward: 76.224 [51.230, 101.219] - loss: 7.856 - mae: 46.907 - mean_q: 61.089 Interval 109 (162000 steps performed) 1500/1500 [==============================] - 16s 10ms/step - reward: 0.3134 2 episodes - episode_reward: 176.628 [125.663, 227.593] - loss: 7.868 - mae: 46.841 - mean_q: 60.701 Interval 110 (163500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.3141 5 episodes - episode_reward: 127.180 [8.596, 261.536] - loss: 8.439 - mae: 46.793 - mean_q: 60.636 Interval 111 (165000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.6165 5 episodes - episode_reward: 185.032 [-9.957, 273.788] - loss: 8.503 - mae: 47.158 - mean_q: 61.409 Interval 112 (166500 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.1591 4 episodes - episode_reward: 62.717 [-100.000, 232.935] - loss: 8.324 - mae: 47.710 - mean_q: 61.925 Interval 113 (168000 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.2939 3 episodes - episode_reward: 92.026 [-99.640, 230.318] - loss: 7.589 - mae: 48.049 - mean_q: 62.409 Interval 114 (169500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4063 6 episodes - episode_reward: 108.267 [-98.463, 312.205] - loss: 7.967 - mae: 48.348 - mean_q: 62.764 Interval 115 (171000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.3164 4 episodes - episode_reward: 136.496 [-178.274, 284.957] - loss: 8.422 - mae: 49.308 - mean_q: 64.208 Interval 116 (172500 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.4933 3 episodes - episode_reward: 226.353 [154.664, 276.678] - loss: 8.789 - mae: 50.517 - mean_q: 65.861 Interval 117 (174000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4519 5 episodes - episode_reward: 118.345 [-117.457, 320.768] - loss: 8.839 - mae: 50.771 - mean_q: 66.421 Interval 118 (175500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.0061 5 episodes - episode_reward: 30.799 [-185.604, 285.214] - loss: 8.126 - mae: 50.812 - mean_q: 66.610 Interval 119 (177000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4282 4 episodes - episode_reward: 128.737 [-133.174, 234.419] - loss: 10.439 - mae: 51.145 - mean_q: 67.223 Interval 120 (178500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.4818 4 episodes - episode_reward: 178.326 [-48.256, 268.607] - loss: 8.197 - mae: 51.462 - mean_q: 67.512 Interval 121 (180000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.5677 4 episodes - episode_reward: 209.215 [6.962, 323.185] - loss: 8.752 - mae: 51.876 - mean_q: 67.846 Interval 122 (181500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.4076 3 episodes - episode_reward: 226.016 [179.061, 304.124] - loss: 8.626 - mae: 52.732 - mean_q: 68.826 Interval 123 (183000 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.2832 7 episodes - episode_reward: 76.682 [-127.323, 306.272] - loss: 11.072 - mae: 52.914 - mean_q: 68.977 Interval 124 (184500 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.2627 4 episodes - episode_reward: 84.514 [-118.146, 289.061] - loss: 8.632 - mae: 52.640 - mean_q: 68.575 Interval 125 (186000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.3710 4 episodes - episode_reward: 142.301 [-166.638, 279.946] - loss: 9.993 - mae: 51.936 - mean_q: 67.639 Interval 126 (187500 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.2793 2 episodes - episode_reward: 200.103 [157.397, 242.809] - loss: 9.646 - mae: 51.679 - mean_q: 67.239 Interval 127 (189000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.2161 6 episodes - episode_reward: 69.907 [-405.842, 273.214] - loss: 9.634 - mae: 51.697 - mean_q: 67.477 Interval 128 (190500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4999 5 episodes - episode_reward: 128.287 [-164.219, 288.710] - loss: 8.081 - mae: 51.774 - mean_q: 67.647 Interval 129 (192000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.2533 5 episodes - episode_reward: 94.864 [-80.961, 216.038] - loss: 8.145 - mae: 51.588 - mean_q: 67.100 Interval 130 (193500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.3384 8 episodes - episode_reward: 41.639 [-119.425, 281.931] - loss: 9.494 - mae: 51.350 - mean_q: 66.915 Interval 131 (195000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.6568 6 episodes - episode_reward: 172.001 [-65.386, 296.014] - loss: 10.027 - mae: 51.336 - mean_q: 66.817 Interval 132 (196500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.1228 7 episodes - episode_reward: 31.472 [-173.123, 222.212] - loss: 7.997 - mae: 51.364 - mean_q: 67.162 Interval 133 (198000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.6316 4 episodes - episode_reward: 258.233 [243.790, 281.593] - loss: 10.680 - mae: 51.598 - mean_q: 67.736 Interval 134 (199500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4089 4 episodes - episode_reward: 107.284 [-114.420, 263.749] - loss: 10.709 - mae: 51.200 - mean_q: 67.052 Interval 135 (201000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.1714 6 episodes - episode_reward: 63.886 [-175.356, 299.039] - loss: 8.352 - mae: 51.561 - mean_q: 67.447 Interval 136 (202500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.4975 3 episodes - episode_reward: 219.892 [206.203, 241.286] - loss: 11.190 - mae: 51.768 - mean_q: 67.757 Interval 137 (204000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4390 7 episodes - episode_reward: 118.369 [-125.198, 303.595] - loss: 8.921 - mae: 51.938 - mean_q: 67.864 Interval 138 (205500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.0164 5 episodes - episode_reward: -28.312 [-287.195, 259.387] - loss: 10.310 - mae: 51.708 - mean_q: 67.441 Interval 139 (207000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.3129 6 episodes - episode_reward: 109.074 [-100.000, 280.402] - loss: 10.504 - mae: 52.216 - mean_q: 67.937 Interval 140 (208500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4148 4 episodes - episode_reward: 160.381 [-103.643, 317.814] - loss: 10.085 - mae: 52.246 - mean_q: 67.942 Interval 141 (210000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.3691 5 episodes - episode_reward: 80.009 [-187.578, 262.392] - loss: 10.425 - mae: 52.362 - mean_q: 68.338 Interval 142 (211500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.3210 2 episodes - episode_reward: 218.916 [135.613, 302.219] - loss: 12.202 - mae: 52.490 - mean_q: 68.104 Interval 143 (213000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.3436 6 episodes - episode_reward: 108.537 [-230.516, 321.262] - loss: 12.361 - mae: 52.166 - mean_q: 67.479 Interval 144 (214500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4109 5 episodes - episode_reward: 79.653 [-146.202, 227.507] - loss: 9.999 - mae: 52.021 - mean_q: 67.060 Interval 145 (216000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4711 6 episodes - episode_reward: 144.262 [-143.949, 326.072] - loss: 9.921 - mae: 52.278 - mean_q: 67.322 Interval 146 (217500 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.1693 3 episodes - episode_reward: 85.744 [-130.208, 231.964] - loss: 9.463 - mae: 52.323 - mean_q: 67.150 Interval 147 (219000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.5572 4 episodes - episode_reward: 222.238 [153.101, 246.576] - loss: 10.917 - mae: 52.417 - mean_q: 67.542 Interval 148 (220500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.0668 1 episodes - episode_reward: 49.837 [49.837, 49.837] - loss: 9.434 - mae: 52.212 - mean_q: 67.609 Interval 149 (222000 steps performed) 1500/1500 [==============================] - 16s 11ms/step - reward: 0.1484 1 episodes - episode_reward: 133.430 [133.430, 133.430] - loss: 10.850 - mae: 51.842 - mean_q: 67.476 Interval 150 (223500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.5450 3 episodes - episode_reward: 261.626 [241.307, 273.697] - loss: 9.432 - mae: 51.861 - mean_q: 67.731 Interval 151 (225000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4847 4 episodes - episode_reward: 211.112 [37.059, 326.767] - loss: 10.200 - mae: 51.584 - mean_q: 67.243 Interval 152 (226500 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.2487 2 episodes - episode_reward: 214.272 [173.716, 254.829] - loss: 9.730 - mae: 51.348 - mean_q: 67.020 Interval 153 (228000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4615 3 episodes - episode_reward: 202.271 [112.357, 283.000] - loss: 9.408 - mae: 51.693 - mean_q: 67.539 Interval 154 (229500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.3673 3 episodes - episode_reward: 212.750 [181.326, 238.427] - loss: 9.838 - mae: 51.489 - mean_q: 67.073 Interval 155 (231000 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.3024 2 episodes - episode_reward: 206.990 [78.069, 335.912] - loss: 10.075 - mae: 50.749 - mean_q: 66.206 Interval 156 (232500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.3851 4 episodes - episode_reward: 154.002 [-168.132, 318.579] - loss: 9.647 - mae: 50.506 - mean_q: 66.112 Interval 157 (234000 steps performed) 1500/1500 [==============================] - 14s 10ms/step - reward: 0.3110 2 episodes - episode_reward: 161.562 [148.867, 174.257] - loss: 9.449 - mae: 50.491 - mean_q: 66.010 Interval 158 (235500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.4570 3 episodes - episode_reward: 204.952 [138.049, 243.321] - loss: 9.977 - mae: 49.901 - mean_q: 65.339 Interval 159 (237000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.5362 3 episodes - episode_reward: 295.442 [242.949, 329.203] - loss: 10.456 - mae: 49.973 - mean_q: 65.587 Interval 160 (238500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.5346 4 episodes - episode_reward: 185.847 [-100.000, 294.440] - loss: 9.396 - mae: 50.274 - mean_q: 66.030 Interval 161 (240000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.1473 6 episodes - episode_reward: 61.526 [-263.155, 320.523] - loss: 8.437 - mae: 50.289 - mean_q: 66.163 Interval 162 (241500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.2412 4 episodes - episode_reward: 71.804 [-234.778, 216.613] - loss: 10.515 - mae: 50.325 - mean_q: 66.270 Interval 163 (243000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.5941 4 episodes - episode_reward: 224.261 [60.141, 293.190] - loss: 8.933 - mae: 50.367 - mean_q: 66.175 Interval 164 (244500 steps performed) 1500/1500 [==============================] - 15s 10ms/step - reward: 0.1653 1 episodes - episode_reward: 218.669 [218.669, 218.669] - loss: 12.061 - mae: 50.579 - mean_q: 66.509 Interval 165 (246000 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: -0.4601 7 episodes - episode_reward: -85.702 [-626.593, 278.271] - loss: 11.815 - mae: 50.470 - mean_q: 66.268 Interval 166 (247500 steps performed) 1500/1500 [==============================] - 14s 9ms/step - reward: 0.0750 5 episodes - episode_reward: 24.372 [-137.668, 273.204] - loss: 16.007 - mae: 50.804 - mean_q: 66.581 Interval 167 (249000 steps performed) 1000/1500 [===================>..........] - ETA: 4s - reward: 0.0330done, took 2383.690 seconds
weights.append(f'dqn_lunar_weights_seven.h5f')
models.append(dqn)
models[-1].load_weights(weights[-1])
dqn.save_weights(weights_filename, overwrite=True)
pd.DataFrame(history.history)ax = df['episode_reward'].plot(color = 'lightgray')
df['episode_reward'].rolling(50).mean().plot(color = 'black')
ax.set_xlabel("Episode")
plt.ylabel("Rolling Mean (10) Cumulative Return")
plt.show()
We see an immediate positive result. Although the rolling average hasn't increased massively, we are seing many more large positive numbers.
I decided to increase both the memory and the log interval again, to see if this would continue to improve performance.
rl['Model 8'] = [6, '128/64/32', 250000, 200000, 2500, None]
memory = SequentialMemory(limit=200000, window_length=6)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=30,
target_model_update=1e-5, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
weights_filename = f'dqn_lunar_weights_eight.h5f'
checkpoint_weights_filename = 'dqn_lunar_weights_{step}.h5f'
log_filename = f'dqn_lunar_log.json'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]
start_time = time.time()
history = dqn.fit(env, callbacks=callbacks, nb_steps=250000, log_interval=2500)
end_time = time.time()
Training for 250000 steps ... Interval 1 (0 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: -0.0401 6 episodes - episode_reward: 10.497 [-348.233, 277.555] - loss: 4.766 - mae: 49.380 - mean_q: 55.695 Interval 2 (2500 steps performed) 2500/2500 [==============================] - 19s 7ms/step - reward: 0.4039 5 episodes - episode_reward: 137.459 [-105.190, 266.944] - loss: 3.963 - mae: 49.627 - mean_q: 64.135 Interval 3 (5000 steps performed) 2500/2500 [==============================] - 18s 7ms/step - reward: 0.5645 6 episodes - episode_reward: 229.995 [178.545, 277.034] - loss: 3.895 - mae: 50.012 - mean_q: 65.824 Interval 4 (7500 steps performed) 2500/2500 [==============================] - 18s 7ms/step - reward: 0.7021 9 episodes - episode_reward: 206.295 [-104.183, 280.230] - loss: 5.334 - mae: 51.652 - mean_q: 68.259 Interval 5 (10000 steps performed) 2500/2500 [==============================] - 18s 7ms/step - reward: 0.7029 7 episodes - episode_reward: 253.416 [208.854, 330.202] - loss: 4.421 - mae: 52.071 - mean_q: 69.090 Interval 6 (12500 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: 0.0566 4 episodes - episode_reward: 19.189 [-350.440, 242.466] - loss: 5.093 - mae: 52.759 - mean_q: 70.133 Interval 7 (15000 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: 0.2432 8 episodes - episode_reward: 72.695 [-288.466, 297.464] - loss: 5.179 - mae: 50.659 - mean_q: 67.177 Interval 8 (17500 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: 0.6699 9 episodes - episode_reward: 200.583 [-107.916, 274.224] - loss: 5.656 - mae: 50.143 - mean_q: 66.649 Interval 9 (20000 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: 0.3467 5 episodes - episode_reward: 149.660 [-140.886, 248.542] - loss: 6.668 - mae: 50.179 - mean_q: 66.765 Interval 10 (22500 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: 0.2917 4 episodes - episode_reward: 236.821 [195.618, 275.891] - loss: 6.160 - mae: 48.872 - mean_q: 65.231 Interval 11 (25000 steps performed) 2500/2500 [==============================] - 19s 7ms/step - reward: 0.4461 8 episodes - episode_reward: 134.744 [-302.449, 270.255] - loss: 6.660 - mae: 49.362 - mean_q: 65.928 Interval 12 (27500 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: 0.6188 7 episodes - episode_reward: 208.060 [-10.967, 292.226] - loss: 6.897 - mae: 49.787 - mean_q: 66.533 Interval 13 (30000 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: 0.5555 7 episodes - episode_reward: 176.430 [-141.857, 277.534] - loss: 6.087 - mae: 50.195 - mean_q: 67.113 Interval 14 (32500 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: 0.6063 7 episodes - episode_reward: 248.006 [184.690, 314.851] - loss: 6.698 - mae: 50.284 - mean_q: 67.280 Interval 15 (35000 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: 0.6106 9 episodes - episode_reward: 167.401 [-110.697, 307.701] - loss: 5.992 - mae: 50.468 - mean_q: 67.506 Interval 16 (37500 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: 0.8027 8 episodes - episode_reward: 238.055 [195.053, 287.714] - loss: 6.507 - mae: 50.690 - mean_q: 67.806 Interval 17 (40000 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: 0.6337 8 episodes - episode_reward: 208.414 [-81.880, 335.293] - loss: 5.952 - mae: 50.855 - mean_q: 68.079 Interval 18 (42500 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: 0.5663 9 episodes - episode_reward: 155.604 [-130.699, 264.406] - loss: 5.808 - mae: 50.910 - mean_q: 68.116 Interval 19 (45000 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: 0.3677 7 episodes - episode_reward: 133.095 [-255.838, 297.347] - loss: 7.251 - mae: 51.276 - mean_q: 68.630 Interval 20 (47500 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: 0.0442 11 episodes - episode_reward: 14.289 [-397.560, 290.693] - loss: 7.665 - mae: 51.565 - mean_q: 68.908 Interval 21 (50000 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: 0.2419 5 episodes - episode_reward: 94.595 [-425.619, 253.563] - loss: 10.698 - mae: 51.874 - mean_q: 69.156 Interval 22 (52500 steps performed) 2500/2500 [==============================] - 21s 9ms/step - reward: 0.1798 6 episodes - episode_reward: 82.772 [-231.136, 251.912] - loss: 9.717 - mae: 51.874 - mean_q: 69.164 Interval 23 (55000 steps performed) 2500/2500 [==============================] - 23s 9ms/step - reward: 0.6157 7 episodes - episode_reward: 227.448 [-36.109, 320.286] - loss: 10.959 - mae: 51.923 - mean_q: 69.258 Interval 24 (57500 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: 0.4279 10 episodes - episode_reward: 106.389 [-291.839, 285.924] - loss: 8.541 - mae: 52.060 - mean_q: 69.379 Interval 25 (60000 steps performed) 2500/2500 [==============================] - 21s 8ms/step - reward: 0.4857 12 episodes - episode_reward: 96.294 [-100.336, 277.493] - loss: 9.654 - mae: 52.111 - mean_q: 69.412 Interval 26 (62500 steps performed) 2500/2500 [==============================] - 21s 9ms/step - reward: 0.4785 8 episodes - episode_reward: 163.646 [-103.628, 288.147] - loss: 10.449 - mae: 52.370 - mean_q: 69.690 Interval 27 (65000 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: 0.6204 10 episodes - episode_reward: 155.360 [-132.528, 263.998] - loss: 10.172 - mae: 52.392 - mean_q: 69.779 Interval 28 (67500 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: 0.4512 7 episodes - episode_reward: 131.618 [-261.526, 300.593] - loss: 11.336 - mae: 52.517 - mean_q: 69.901 Interval 29 (70000 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: 0.7662 9 episodes - episode_reward: 238.132 [173.775, 288.733] - loss: 9.099 - mae: 52.402 - mean_q: 69.914 Interval 30 (72500 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: 0.4783 10 episodes - episode_reward: 103.068 [-200.275, 285.722] - loss: 10.371 - mae: 52.533 - mean_q: 69.948 Interval 31 (75000 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: 0.4413 10 episodes - episode_reward: 113.101 [-236.982, 323.497] - loss: 9.053 - mae: 52.607 - mean_q: 69.985 Interval 32 (77500 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: 0.5889 8 episodes - episode_reward: 187.315 [-86.806, 276.687] - loss: 9.973 - mae: 52.664 - mean_q: 69.973 Interval 33 (80000 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: 0.8608 9 episodes - episode_reward: 245.417 [-44.515, 331.840] - loss: 9.283 - mae: 53.007 - mean_q: 70.403 Interval 34 (82500 steps performed) 2500/2500 [==============================] - 23s 9ms/step - reward: -0.1372 11 episodes - episode_reward: -31.823 [-367.718, 214.107] - loss: 10.273 - mae: 52.970 - mean_q: 70.373 Interval 35 (85000 steps performed) 2500/2500 [==============================] - 25s 10ms/step - reward: 0.1158 1 episodes - episode_reward: 160.876 [160.876, 160.876] - loss: 8.915 - mae: 52.474 - mean_q: 69.675 Interval 36 (87500 steps performed) 2500/2500 [==============================] - 31s 13ms/step - reward: -0.0543 Interval 37 (90000 steps performed) 2500/2500 [==============================] - 24s 10ms/step - reward: 0.4862 8 episodes - episode_reward: 148.754 [-221.076, 322.764] - loss: 10.111 - mae: 51.477 - mean_q: 68.397 Interval 38 (92500 steps performed) 2500/2500 [==============================] - 24s 10ms/step - reward: 0.2503 6 episodes - episode_reward: 101.485 [-219.275, 317.161] - loss: 8.091 - mae: 51.543 - mean_q: 68.567 Interval 39 (95000 steps performed) 2500/2500 [==============================] - 21s 9ms/step - reward: 0.5490 8 episodes - episode_reward: 162.114 [-174.299, 312.301] - loss: 11.063 - mae: 51.529 - mean_q: 68.429 Interval 40 (97500 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: 0.5147 6 episodes - episode_reward: 234.260 [163.084, 283.779] - loss: 7.379 - mae: 51.647 - mean_q: 68.539 Interval 41 (100000 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: 0.6150 8 episodes - episode_reward: 179.692 [-100.000, 333.487] - loss: 7.831 - mae: 51.628 - mean_q: 68.558 Interval 42 (102500 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: 0.4595 7 episodes - episode_reward: 172.209 [-100.000, 331.636] - loss: 8.441 - mae: 51.702 - mean_q: 68.782 Interval 43 (105000 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: 0.6060 8 episodes - episode_reward: 192.932 [-218.159, 295.607] - loss: 8.313 - mae: 51.725 - mean_q: 68.804 Interval 44 (107500 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: 0.4306 11 episodes - episode_reward: 94.976 [-100.000, 303.383] - loss: 8.149 - mae: 51.760 - mean_q: 68.811 Interval 45 (110000 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: 0.5777 8 episodes - episode_reward: 179.726 [5.385, 279.052] - loss: 10.323 - mae: 51.973 - mean_q: 69.068 Interval 46 (112500 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: 0.4046 8 episodes - episode_reward: 119.407 [-262.147, 279.323] - loss: 8.013 - mae: 52.067 - mean_q: 69.285 Interval 47 (115000 steps performed) 2500/2500 [==============================] - 21s 8ms/step - reward: 0.5059 5 episodes - episode_reward: 263.476 [232.681, 289.336] - loss: 9.529 - mae: 52.156 - mean_q: 69.404 Interval 48 (117500 steps performed) 2500/2500 [==============================] - 25s 10ms/step - reward: -0.0274 Interval 49 (120000 steps performed) 2500/2500 [==============================] - 30s 12ms/step - reward: -0.0267 Interval 50 (122500 steps performed) 2500/2500 [==============================] - 30s 12ms/step - reward: 0.1835 5 episodes - episode_reward: 52.437 [-107.010, 216.620] - loss: 9.238 - mae: 50.743 - mean_q: 67.559 Interval 51 (125000 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: 0.5552 8 episodes - episode_reward: 189.332 [-128.802, 291.575] - loss: 7.866 - mae: 50.636 - mean_q: 67.410 Interval 52 (127500 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: 0.6451 8 episodes - episode_reward: 181.355 [-100.000, 314.658] - loss: 8.851 - mae: 50.718 - mean_q: 67.534 Interval 53 (130000 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: 0.4214 8 episodes - episode_reward: 139.601 [-182.181, 279.265] - loss: 9.056 - mae: 50.932 - mean_q: 67.921 Interval 54 (132500 steps performed) 2500/2500 [==============================] - 28s 11ms/step - reward: -0.0322 Interval 55 (135000 steps performed) 2500/2500 [==============================] - 25s 10ms/step - reward: 0.4826 8 episodes - episode_reward: 131.323 [-166.159, 301.259] - loss: 7.967 - mae: 50.319 - mean_q: 67.091 Interval 56 (137500 steps performed) 2500/2500 [==============================] - 26s 10ms/step - reward: 0.6724 7 episodes - episode_reward: 249.929 [194.725, 278.681] - loss: 7.869 - mae: 50.471 - mean_q: 67.260 Interval 57 (140000 steps performed) 2500/2500 [==============================] - 28s 11ms/step - reward: 0.5173 9 episodes - episode_reward: 148.514 [-122.907, 293.273] - loss: 7.174 - mae: 50.480 - mean_q: 67.213 Interval 58 (142500 steps performed) 2500/2500 [==============================] - 28s 11ms/step - reward: 0.8727 10 episodes - episode_reward: 226.417 [-91.712, 317.289] - loss: 8.645 - mae: 50.674 - mean_q: 67.481 Interval 59 (145000 steps performed) 2500/2500 [==============================] - 28s 11ms/step - reward: 0.3113 11 episodes - episode_reward: 58.181 [-280.874, 275.826] - loss: 8.642 - mae: 50.785 - mean_q: 67.645 Interval 60 (147500 steps performed) 2500/2500 [==============================] - 28s 11ms/step - reward: 0.7920 9 episodes - episode_reward: 213.146 [-14.164, 315.146] - loss: 8.933 - mae: 50.966 - mean_q: 67.888 Interval 61 (150000 steps performed) 2500/2500 [==============================] - 29s 11ms/step - reward: 0.7265 9 episodes - episode_reward: 222.821 [-100.377, 310.458] - loss: 8.066 - mae: 51.089 - mean_q: 67.984 Interval 62 (152500 steps performed) 2500/2500 [==============================] - 29s 12ms/step - reward: 0.6146 8 episodes - episode_reward: 170.892 [-48.792, 308.484] - loss: 8.251 - mae: 51.249 - mean_q: 68.249 Interval 63 (155000 steps performed) 2500/2500 [==============================] - 29s 12ms/step - reward: 0.5403 9 episodes - episode_reward: 165.059 [-186.326, 282.043] - loss: 8.150 - mae: 51.151 - mean_q: 68.099 Interval 64 (157500 steps performed) 2500/2500 [==============================] - 29s 12ms/step - reward: 0.2836 11 episodes - episode_reward: 69.112 [-306.907, 330.109] - loss: 8.895 - mae: 51.408 - mean_q: 68.377 Interval 65 (160000 steps performed) 2500/2500 [==============================] - 29s 12ms/step - reward: 0.5867 7 episodes - episode_reward: 185.139 [-179.952, 285.599] - loss: 7.721 - mae: 51.408 - mean_q: 68.394 Interval 66 (162500 steps performed) 2500/2500 [==============================] - 29s 12ms/step - reward: 0.6555 9 episodes - episode_reward: 183.120 [-76.921, 287.673] - loss: 9.293 - mae: 51.515 - mean_q: 68.612 Interval 67 (165000 steps performed) 2500/2500 [==============================] - 31s 12ms/step - reward: 0.2085 7 episodes - episode_reward: 76.549 [-228.385, 261.471] - loss: 9.700 - mae: 51.687 - mean_q: 68.812 Interval 68 (167500 steps performed) 2500/2500 [==============================] - 39s 16ms/step - reward: -0.0297 Interval 69 (170000 steps performed) 2500/2500 [==============================] - 48s 19ms/step - reward: -0.0310 Interval 70 (172500 steps performed) 2500/2500 [==============================] - 50s 20ms/step - reward: 0.1843 2 episodes - episode_reward: 144.083 [12.784, 275.382] - loss: 6.927 - mae: 50.501 - mean_q: 67.297 Interval 71 (175000 steps performed) 2500/2500 [==============================] - 32s 13ms/step - reward: 0.5605 7 episodes - episode_reward: 196.647 [-100.000, 287.092] - loss: 7.699 - mae: 50.500 - mean_q: 67.228 Interval 72 (177500 steps performed) 2500/2500 [==============================] - 32s 13ms/step - reward: 0.5133 10 episodes - episode_reward: 128.916 [-178.919, 334.490] - loss: 8.331 - mae: 50.477 - mean_q: 67.216 Interval 73 (180000 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.3429 5 episodes - episode_reward: 201.357 [35.817, 322.268] - loss: 6.428 - mae: 50.472 - mean_q: 67.218 Interval 74 (182500 steps performed) 2500/2500 [==============================] - 37s 15ms/step - reward: 0.0219 Interval 75 (185000 steps performed) 2500/2500 [==============================] - 38s 15ms/step - reward: 0.1718 4 episodes - episode_reward: 93.166 [15.036, 206.537] - loss: 7.891 - mae: 50.004 - mean_q: 66.514 Interval 76 (187500 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: 0.7328 8 episodes - episode_reward: 221.303 [86.558, 311.557] - loss: 8.617 - mae: 50.077 - mean_q: 66.731 Interval 77 (190000 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.6213 8 episodes - episode_reward: 203.228 [-261.944, 286.307] - loss: 7.478 - mae: 49.955 - mean_q: 66.590 Interval 78 (192500 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: 0.7879 9 episodes - episode_reward: 228.373 [-63.992, 331.686] - loss: 7.582 - mae: 50.110 - mean_q: 66.808 Interval 79 (195000 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.7428 7 episodes - episode_reward: 245.491 [194.797, 276.787] - loss: 7.474 - mae: 50.264 - mean_q: 66.997 Interval 80 (197500 steps performed) 2500/2500 [==============================] - 37s 15ms/step - reward: 0.6603 8 episodes - episode_reward: 204.030 [-124.997, 302.885] - loss: 7.750 - mae: 50.311 - mean_q: 67.131 Interval 81 (200000 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.6874 10 episodes - episode_reward: 178.893 [-211.815, 335.129] - loss: 8.251 - mae: 50.458 - mean_q: 67.274 Interval 82 (202500 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: 0.3241 7 episodes - episode_reward: 109.868 [-144.025, 294.114] - loss: 9.147 - mae: 50.473 - mean_q: 67.221 Interval 83 (205000 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: 0.6891 11 episodes - episode_reward: 173.461 [-309.883, 295.287] - loss: 8.134 - mae: 50.431 - mean_q: 67.200 Interval 84 (207500 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: 0.2781 11 episodes - episode_reward: 54.727 [-235.540, 270.607] - loss: 8.194 - mae: 50.585 - mean_q: 67.342 Interval 85 (210000 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: 0.8617 10 episodes - episode_reward: 210.206 [-103.342, 340.423] - loss: 6.858 - mae: 50.427 - mean_q: 67.093 Interval 86 (212500 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.3431 10 episodes - episode_reward: 102.522 [-238.973, 308.620] - loss: 8.631 - mae: 50.753 - mean_q: 67.647 Interval 87 (215000 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.6523 6 episodes - episode_reward: 263.205 [205.676, 315.042] - loss: 7.970 - mae: 50.803 - mean_q: 67.643 Interval 88 (217500 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.5921 7 episodes - episode_reward: 183.393 [-83.356, 317.158] - loss: 7.150 - mae: 50.814 - mean_q: 67.699 Interval 89 (220000 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.6463 9 episodes - episode_reward: 194.737 [-31.206, 314.145] - loss: 7.614 - mae: 50.970 - mean_q: 67.837 Interval 90 (222500 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.5483 12 episodes - episode_reward: 117.167 [-202.976, 308.435] - loss: 9.140 - mae: 51.265 - mean_q: 68.248 Interval 91 (225000 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.4207 10 episodes - episode_reward: 105.425 [-163.316, 271.564] - loss: 8.529 - mae: 51.354 - mean_q: 68.229 Interval 92 (227500 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.1351 10 episodes - episode_reward: 19.416 [-259.979, 277.950] - loss: 9.706 - mae: 51.354 - mean_q: 68.073 Interval 93 (230000 steps performed) 2500/2500 [==============================] - 38s 15ms/step - reward: 0.0961 2 episodes - episode_reward: 198.062 [129.069, 267.056] - loss: 8.472 - mae: 51.127 - mean_q: 67.935 Interval 94 (232500 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.7252 6 episodes - episode_reward: 271.927 [195.090, 334.012] - loss: 9.006 - mae: 50.903 - mean_q: 67.653 Interval 95 (235000 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.5370 12 episodes - episode_reward: 128.964 [-158.958, 282.500] - loss: 7.655 - mae: 51.133 - mean_q: 67.979 Interval 96 (237500 steps performed) 2500/2500 [==============================] - 37s 15ms/step - reward: 0.2745 3 episodes - episode_reward: 218.511 [177.625, 270.935] - loss: 7.678 - mae: 51.110 - mean_q: 67.852 Interval 97 (240000 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.6472 10 episodes - episode_reward: 145.338 [-212.061, 304.728] - loss: 9.394 - mae: 51.013 - mean_q: 67.721 Interval 98 (242500 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.8403 8 episodes - episode_reward: 277.171 [221.032, 334.001] - loss: 8.568 - mae: 50.922 - mean_q: 67.541 Interval 99 (245000 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: 0.4670 10 episodes - episode_reward: 114.907 [-145.984, 305.470] - loss: 9.541 - mae: 51.023 - mean_q: 67.620 Interval 100 (247500 steps performed) 2500/2500 [==============================] - 37s 15ms/step - reward: 0.6382 done, took 2706.556 seconds
dqn.save_weights(weights_filename, overwrite=True)
weights.append(f'dqn_lunar_weights_eight.h5f')
models.append(dqn)
models[-1].load_weights(weights[-1])
df = pd.DataFrame(history.history)
ax = df['episode_reward'].plot(color = 'lightgray')
df['episode_reward'].rolling(50).mean().plot(color = 'black')
ax.set_xlabel("Episode")
plt.ylabel("Rolling Mean (10) Cumulative Return")
plt.show()
The rolling average is now sitting between +100 and +200 reward, a massive leap from our previous model.
Out of curiosity I decided to see if we would get a similar performance improvement with a window length of 4.
df.to_csv('lunar_training_weights_8')
rl['Model 9'] = [4, '128/64/32', 250000, 200000, 2500, None]
# neural netwok model
model = Sequential()
model.add(Flatten(input_shape=(4,) + env.observation_space.shape))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
memory = SequentialMemory(limit=200000, window_length=4)
policy = EpsGreedyQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=30,
target_model_update=1e-5, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
weights_filename = f'dqn_lunar_weights_nine.h5f'
checkpoint_weights_filename = 'dqn_lunar_weights_{step}.h5f'
log_filename = f'dqn_lunar_log.json'
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]
start_time = time.time()
history = dqn.fit(env, callbacks=callbacks, nb_steps=250000, log_interval=2500)
end_time = time.time()
Training for 250000 steps ... Interval 1 (0 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: -2.0730 19 episodes - episode_reward: -268.850 [-653.081, -81.307] - loss: 28.108 - mae: 4.082 - mean_q: -0.922 Interval 2 (2500 steps performed) 2500/2500 [==============================] - 18s 7ms/step - reward: -1.0069 14 episodes - episode_reward: -178.005 [-456.257, 49.384] - loss: 18.312 - mae: 3.150 - mean_q: 0.200 Interval 3 (5000 steps performed) 2500/2500 [==============================] - 18s 7ms/step - reward: -0.6521 8 episodes - episode_reward: -217.360 [-493.468, 192.192] - loss: 14.399 - mae: 2.962 - mean_q: 0.313 Interval 4 (7500 steps performed) 2500/2500 [==============================] - 17s 7ms/step - reward: -1.0356 15 episodes - episode_reward: -172.364 [-391.492, 222.306] - loss: 14.128 - mae: 2.803 - mean_q: 0.191 Interval 5 (10000 steps performed) 2500/2500 [==============================] - 17s 7ms/step - reward: -1.4603 15 episodes - episode_reward: -240.625 [-632.908, -100.529] - loss: 16.966 - mae: 2.946 - mean_q: 0.071 Interval 6 (12500 steps performed) 2500/2500 [==============================] - 18s 7ms/step - reward: -1.2630 17 episodes - episode_reward: -180.565 [-585.810, 216.261] - loss: 15.826 - mae: 2.864 - mean_q: 0.029 Interval 7 (15000 steps performed) 2500/2500 [==============================] - 17s 7ms/step - reward: -1.8163 17 episodes - episode_reward: -275.413 [-751.192, -33.819] - loss: 17.835 - mae: 2.877 - mean_q: -0.068 Interval 8 (17500 steps performed) 2500/2500 [==============================] - 18s 7ms/step - reward: -0.9975 13 episodes - episode_reward: -189.348 [-414.346, 32.570] - loss: 17.547 - mae: 2.871 - mean_q: -0.131 Interval 9 (20000 steps performed) 2500/2500 [==============================] - 17s 7ms/step - reward: -1.1763 16 episodes - episode_reward: -184.993 [-446.058, -43.044] - loss: 17.503 - mae: 2.795 - mean_q: -0.153 Interval 10 (22500 steps performed) 2500/2500 [==============================] - 18s 7ms/step - reward: -0.9347 11 episodes - episode_reward: -212.757 [-493.538, -53.308] - loss: 18.126 - mae: 2.853 - mean_q: -0.150 Interval 11 (25000 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: -1.3483 18 episodes - episode_reward: -191.736 [-532.978, 16.681] - loss: 17.360 - mae: 2.848 - mean_q: -0.164 Interval 12 (27500 steps performed) 2500/2500 [==============================] - 21s 8ms/step - reward: -1.3195 16 episodes - episode_reward: -200.854 [-528.024, 187.824] - loss: 19.079 - mae: 2.914 - mean_q: -0.124 Interval 13 (30000 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: -1.5450 13 episodes - episode_reward: -293.053 [-552.517, -65.656] - loss: 18.282 - mae: 2.884 - mean_q: -0.122 Interval 14 (32500 steps performed) 2500/2500 [==============================] - 18s 7ms/step - reward: -1.2041 16 episodes - episode_reward: -190.591 [-502.666, -25.342] - loss: 18.966 - mae: 2.848 - mean_q: -0.212 Interval 15 (35000 steps performed) 2500/2500 [==============================] - 18s 7ms/step - reward: -0.7356 11 episodes - episode_reward: -160.277 [-436.179, 299.590] - loss: 17.794 - mae: 2.857 - mean_q: -0.154 Interval 16 (37500 steps performed) 2500/2500 [==============================] - 18s 7ms/step - reward: -0.6529 12 episodes - episode_reward: -133.349 [-299.743, 201.540] - loss: 18.073 - mae: 2.826 - mean_q: -0.119 Interval 17 (40000 steps performed) 2500/2500 [==============================] - 18s 7ms/step - reward: -1.2420 18 episodes - episode_reward: -174.901 [-474.024, -74.155] - loss: 16.925 - mae: 2.820 - mean_q: -0.127 Interval 18 (42500 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: -0.7472 9 episodes - episode_reward: -221.420 [-555.933, 142.983] - loss: 17.440 - mae: 2.783 - mean_q: -0.078 Interval 19 (45000 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: -0.9183 14 episodes - episode_reward: -162.724 [-442.534, 0.111] - loss: 17.830 - mae: 2.772 - mean_q: -0.074 Interval 20 (47500 steps performed) 2500/2500 [==============================] - 21s 9ms/step - reward: -0.9671 14 episodes - episode_reward: -172.657 [-390.389, -30.417] - loss: 17.075 - mae: 2.864 - mean_q: -0.095 Interval 21 (50000 steps performed) 2500/2500 [==============================] - 19s 8ms/step - reward: -0.8655 12 episodes - episode_reward: -172.439 [-393.150, -55.186] - loss: 15.982 - mae: 2.790 - mean_q: -0.075 Interval 22 (52500 steps performed) 2500/2500 [==============================] - 20s 8ms/step - reward: -0.9990 14 episodes - episode_reward: -182.588 [-438.703, -36.118] - loss: 15.855 - mae: 2.777 - mean_q: -0.089 Interval 23 (55000 steps performed) 2500/2500 [==============================] - 16s 6ms/step - reward: -0.6102 12 episodes - episode_reward: -125.480 [-302.955, 199.783] - loss: 17.321 - mae: 2.793 - mean_q: -0.114 Interval 24 (57500 steps performed) 2500/2500 [==============================] - 17s 7ms/step - reward: -0.6204 13 episodes - episode_reward: -113.232 [-336.965, 216.390] - loss: 16.976 - mae: 2.843 - mean_q: -0.075 Interval 25 (60000 steps performed) 2500/2500 [==============================] - 16s 6ms/step - reward: -0.8309 12 episodes - episode_reward: -183.336 [-308.670, 10.554] - loss: 17.725 - mae: 2.821 - mean_q: -0.015 Interval 26 (62500 steps performed) 2500/2500 [==============================] - 17s 7ms/step - reward: -0.6421 12 episodes - episode_reward: -132.512 [-254.523, -3.985] - loss: 16.100 - mae: 2.814 - mean_q: -0.001 Interval 27 (65000 steps performed) 2500/2500 [==============================] - 21s 8ms/step - reward: -1.3304 14 episodes - episode_reward: -235.628 [-873.744, -83.313] - loss: 17.978 - mae: 2.827 - mean_q: -0.044 Interval 28 (67500 steps performed) 2500/2500 [==============================] - 21s 8ms/step - reward: -0.5948 14 episodes - episode_reward: -106.634 [-247.890, 176.427] - loss: 19.312 - mae: 2.868 - mean_q: -0.028 Interval 29 (70000 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: -0.7629 13 episodes - episode_reward: -147.820 [-320.356, -9.468] - loss: 17.009 - mae: 2.831 - mean_q: 0.031 Interval 30 (72500 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: -1.1009 13 episodes - episode_reward: -202.412 [-450.618, -99.075] - loss: 15.799 - mae: 2.801 - mean_q: 0.022 Interval 31 (75000 steps performed) 2500/2500 [==============================] - 21s 9ms/step - reward: -1.2249 16 episodes - episode_reward: -199.790 [-778.510, -37.183] - loss: 16.544 - mae: 2.863 - mean_q: 0.033 Interval 32 (77500 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: -0.9916 12 episodes - episode_reward: -206.800 [-417.958, -39.453] - loss: 17.136 - mae: 2.951 - mean_q: 0.016 Interval 33 (80000 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: -0.6503 11 episodes - episode_reward: -151.908 [-254.366, -6.610] - loss: 15.925 - mae: 2.859 - mean_q: 0.021 Interval 34 (82500 steps performed) 2500/2500 [==============================] - 23s 9ms/step - reward: -0.7155 13 episodes - episode_reward: -137.994 [-538.878, -14.161] - loss: 16.669 - mae: 2.941 - mean_q: -0.020 Interval 35 (85000 steps performed) 2500/2500 [==============================] - 24s 9ms/step - reward: -0.6484 12 episodes - episode_reward: -131.235 [-287.175, 130.375] - loss: 16.368 - mae: 2.858 - mean_q: 0.029 Interval 36 (87500 steps performed) 2500/2500 [==============================] - 23s 9ms/step - reward: -1.1748 15 episodes - episode_reward: -194.574 [-423.744, -38.727] - loss: 15.091 - mae: 2.893 - mean_q: 0.057 Interval 37 (90000 steps performed) 2500/2500 [==============================] - 23s 9ms/step - reward: -0.8703 13 episodes - episode_reward: -169.629 [-310.674, -36.255] - loss: 15.730 - mae: 2.851 - mean_q: 0.041 Interval 38 (92500 steps performed) 2500/2500 [==============================] - 23s 9ms/step - reward: -1.6853 14 episodes - episode_reward: -297.826 [-677.031, -27.467] - loss: 16.840 - mae: 2.947 - mean_q: 0.023 Interval 39 (95000 steps performed) 2500/2500 [==============================] - 24s 10ms/step - reward: -1.0146 10 episodes - episode_reward: -243.730 [-391.064, -20.839] - loss: 15.060 - mae: 2.849 - mean_q: 0.052 Interval 40 (97500 steps performed) 2500/2500 [==============================] - 23s 9ms/step - reward: -0.9317 14 episodes - episode_reward: -171.357 [-287.674, -83.606] - loss: 17.466 - mae: 2.949 - mean_q: 0.043 Interval 41 (100000 steps performed) 2500/2500 [==============================] - 22s 9ms/step - reward: -0.7490 14 episodes - episode_reward: -137.594 [-355.791, 22.129] - loss: 16.295 - mae: 3.007 - mean_q: 0.023 Interval 42 (102500 steps performed) 2500/2500 [==============================] - 23s 9ms/step - reward: -0.8939 12 episodes - episode_reward: -176.792 [-413.367, -45.491] - loss: 14.855 - mae: 2.918 - mean_q: 0.061 Interval 43 (105000 steps performed) 2500/2500 [==============================] - 25s 10ms/step - reward: -0.6088 15 episodes - episode_reward: -109.189 [-301.431, 319.364] - loss: 14.917 - mae: 2.963 - mean_q: 0.095 Interval 44 (107500 steps performed) 2500/2500 [==============================] - 25s 10ms/step - reward: -0.9672 14 episodes - episode_reward: -172.160 [-354.177, 201.947] - loss: 15.220 - mae: 2.947 - mean_q: 0.059 Interval 45 (110000 steps performed) 2500/2500 [==============================] - 24s 10ms/step - reward: -0.7947 14 episodes - episode_reward: -146.812 [-407.870, 24.074] - loss: 15.449 - mae: 2.968 - mean_q: 0.071 Interval 46 (112500 steps performed) 2500/2500 [==============================] - 25s 10ms/step - reward: -0.3914 10 episodes - episode_reward: -107.438 [-299.709, 23.040] - loss: 14.201 - mae: 2.914 - mean_q: 0.137 Interval 47 (115000 steps performed) 2500/2500 [==============================] - 25s 10ms/step - reward: -0.4000 10 episodes - episode_reward: -89.810 [-274.030, 269.002] - loss: 16.541 - mae: 2.975 - mean_q: 0.113 Interval 48 (117500 steps performed) 2500/2500 [==============================] - 25s 10ms/step - reward: -0.5942 12 episodes - episode_reward: -100.939 [-233.754, 185.618] - loss: 14.593 - mae: 2.946 - mean_q: 0.141 Interval 49 (120000 steps performed) 2500/2500 [==============================] - 26s 10ms/step - reward: -0.3485 10 episodes - episode_reward: -100.917 [-307.021, 167.634] - loss: 15.597 - mae: 2.998 - mean_q: 0.186 Interval 50 (122500 steps performed) 2500/2500 [==============================] - 26s 10ms/step - reward: -1.1537 13 episodes - episode_reward: -218.119 [-354.857, -100.000] - loss: 14.282 - mae: 2.975 - mean_q: 0.187 Interval 51 (125000 steps performed) 2500/2500 [==============================] - 26s 10ms/step - reward: -1.3856 17 episodes - episode_reward: -204.320 [-602.774, -57.328] - loss: 16.896 - mae: 3.046 - mean_q: 0.138 Interval 52 (127500 steps performed) 2500/2500 [==============================] - 26s 10ms/step - reward: -0.4418 9 episodes - episode_reward: -133.573 [-303.958, 310.939] - loss: 16.596 - mae: 3.075 - mean_q: 0.141 Interval 53 (130000 steps performed) 2500/2500 [==============================] - 27s 11ms/step - reward: -0.2600 9 episodes - episode_reward: -67.654 [-204.541, 262.222] - loss: 14.872 - mae: 3.046 - mean_q: 0.197 Interval 54 (132500 steps performed) 2500/2500 [==============================] - 26s 11ms/step - reward: -0.9376 11 episodes - episode_reward: -218.940 [-354.480, -84.967] - loss: 13.730 - mae: 2.978 - mean_q: 0.232 Interval 55 (135000 steps performed) 2500/2500 [==============================] - 26s 11ms/step - reward: -0.6966 14 episodes - episode_reward: -124.239 [-343.806, 187.734] - loss: 14.812 - mae: 2.970 - mean_q: 0.184 Interval 56 (137500 steps performed) 2500/2500 [==============================] - 27s 11ms/step - reward: -0.5951 11 episodes - episode_reward: -131.010 [-324.906, 230.720] - loss: 15.421 - mae: 3.042 - mean_q: 0.172 Interval 57 (140000 steps performed) 2500/2500 [==============================] - 28s 11ms/step - reward: -0.5397 9 episodes - episode_reward: -156.969 [-349.376, 161.294] - loss: 14.849 - mae: 3.036 - mean_q: 0.221 Interval 58 (142500 steps performed) 2500/2500 [==============================] - 27s 11ms/step - reward: -0.7289 11 episodes - episode_reward: -158.810 [-220.582, -94.887] - loss: 16.360 - mae: 3.145 - mean_q: 0.165 Interval 59 (145000 steps performed) 2500/2500 [==============================] - 28s 11ms/step - reward: -0.8553 10 episodes - episode_reward: -213.645 [-357.843, -55.831] - loss: 15.481 - mae: 3.035 - mean_q: 0.228 Interval 60 (147500 steps performed) 2500/2500 [==============================] - 28s 11ms/step - reward: -0.6017 14 episodes - episode_reward: -114.980 [-319.176, 148.685] - loss: 14.305 - mae: 3.010 - mean_q: 0.266 Interval 61 (150000 steps performed) 2500/2500 [==============================] - 28s 11ms/step - reward: -0.8068 15 episodes - episode_reward: -131.383 [-251.914, -32.931] - loss: 14.393 - mae: 3.085 - mean_q: 0.250 Interval 62 (152500 steps performed) 2500/2500 [==============================] - 28s 11ms/step - reward: -0.6326 13 episodes - episode_reward: -116.568 [-282.429, 197.123] - loss: 14.376 - mae: 3.098 - mean_q: 0.237 Interval 63 (155000 steps performed) 2500/2500 [==============================] - 29s 12ms/step - reward: -0.5361 12 episodes - episode_reward: -103.236 [-181.844, 152.023] - loss: 14.455 - mae: 3.165 - mean_q: 0.198 Interval 64 (157500 steps performed) 2500/2500 [==============================] - 29s 12ms/step - reward: -0.6713 10 episodes - episode_reward: -175.086 [-330.583, 181.670] - loss: 13.674 - mae: 3.114 - mean_q: 0.225 Interval 65 (160000 steps performed) 2500/2500 [==============================] - 29s 12ms/step - reward: -1.1699 15 episodes - episode_reward: -189.565 [-302.935, -103.390] - loss: 15.057 - mae: 3.168 - mean_q: 0.236 Interval 66 (162500 steps performed) 2500/2500 [==============================] - 30s 12ms/step - reward: -0.4355 12 episodes - episode_reward: -94.946 [-299.349, 158.197] - loss: 13.435 - mae: 3.079 - mean_q: 0.301 Interval 67 (165000 steps performed) 2500/2500 [==============================] - 31s 12ms/step - reward: -0.5566 10 episodes - episode_reward: -149.019 [-439.742, 182.131] - loss: 14.774 - mae: 3.185 - mean_q: 0.251 Interval 68 (167500 steps performed) 2500/2500 [==============================] - 30s 12ms/step - reward: -0.5519 13 episodes - episode_reward: -104.821 [-300.263, 263.498] - loss: 13.270 - mae: 3.146 - mean_q: 0.267 Interval 69 (170000 steps performed) 2500/2500 [==============================] - 30s 12ms/step - reward: -0.4373 11 episodes - episode_reward: -102.433 [-273.211, 167.064] - loss: 14.046 - mae: 3.215 - mean_q: 0.253 Interval 70 (172500 steps performed) 2500/2500 [==============================] - 30s 12ms/step - reward: -0.7699 12 episodes - episode_reward: -157.129 [-234.679, -78.911] - loss: 13.896 - mae: 3.227 - mean_q: 0.228 Interval 71 (175000 steps performed) 2500/2500 [==============================] - 32s 13ms/step - reward: -0.3458 10 episodes - episode_reward: -91.916 [-267.516, 235.170] - loss: 13.058 - mae: 3.221 - mean_q: 0.272 Interval 72 (177500 steps performed) 2500/2500 [==============================] - 31s 13ms/step - reward: -0.6747 12 episodes - episode_reward: -134.808 [-412.153, 282.039] - loss: 12.989 - mae: 3.203 - mean_q: 0.289 Interval 73 (180000 steps performed) 2500/2500 [==============================] - 31s 12ms/step - reward: -0.7099 11 episodes - episode_reward: -171.216 [-316.690, 284.389] - loss: 13.805 - mae: 3.246 - mean_q: 0.230 Interval 74 (182500 steps performed) 2500/2500 [==============================] - 32s 13ms/step - reward: -0.4216 9 episodes - episode_reward: -103.905 [-255.110, 204.927] - loss: 13.222 - mae: 3.243 - mean_q: 0.289 Interval 75 (185000 steps performed) 2500/2500 [==============================] - 32s 13ms/step - reward: -0.3871 12 episodes - episode_reward: -85.625 [-257.944, 247.041] - loss: 13.234 - mae: 3.266 - mean_q: 0.238 Interval 76 (187500 steps performed) 2500/2500 [==============================] - 33s 13ms/step - reward: -0.4279 8 episodes - episode_reward: -129.479 [-304.175, 152.295] - loss: 14.306 - mae: 3.314 - mean_q: 0.240 Interval 77 (190000 steps performed) 2500/2500 [==============================] - 32s 13ms/step - reward: -0.8921 15 episodes - episode_reward: -148.627 [-349.184, -58.883] - loss: 13.337 - mae: 3.300 - mean_q: 0.261 Interval 78 (192500 steps performed) 2500/2500 [==============================] - 32s 13ms/step - reward: -0.5893 8 episodes - episode_reward: -207.937 [-304.378, -103.347] - loss: 13.827 - mae: 3.320 - mean_q: 0.248 Interval 79 (195000 steps performed) 2500/2500 [==============================] - 32s 13ms/step - reward: -0.8995 11 episodes - episode_reward: -190.030 [-311.836, -76.316] - loss: 13.080 - mae: 3.270 - mean_q: 0.297 Interval 80 (197500 steps performed) 2500/2500 [==============================] - 33s 13ms/step - reward: -1.0762 15 episodes - episode_reward: -178.206 [-351.413, -30.012] - loss: 13.321 - mae: 3.356 - mean_q: 0.247 Interval 81 (200000 steps performed) 2500/2500 [==============================] - 34s 13ms/step - reward: -0.4181 11 episodes - episode_reward: -102.209 [-278.794, 174.838] - loss: 13.029 - mae: 3.377 - mean_q: 0.305 Interval 82 (202500 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: -0.4343 11 episodes - episode_reward: -108.748 [-297.301, 172.814] - loss: 13.034 - mae: 3.395 - mean_q: 0.210 Interval 83 (205000 steps performed) 2500/2500 [==============================] - 34s 13ms/step - reward: -0.5229 10 episodes - episode_reward: -108.647 [-306.475, 267.861] - loss: 12.833 - mae: 3.442 - mean_q: 0.226 Interval 84 (207500 steps performed) 2500/2500 [==============================] - 33s 13ms/step - reward: -1.0630 17 episodes - episode_reward: -157.298 [-262.054, -63.326] - loss: 13.015 - mae: 3.417 - mean_q: 0.272 Interval 85 (210000 steps performed) 2500/2500 [==============================] - 34s 13ms/step - reward: -0.4441 9 episodes - episode_reward: -130.439 [-296.639, 170.227] - loss: 12.591 - mae: 3.418 - mean_q: 0.328 Interval 86 (212500 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: -0.3838 9 episodes - episode_reward: -109.471 [-318.986, 142.454] - loss: 13.937 - mae: 3.511 - mean_q: 0.266 Interval 87 (215000 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: -0.3085 11 episodes - episode_reward: -64.179 [-192.523, 156.684] - loss: 12.523 - mae: 3.458 - mean_q: 0.363 Interval 88 (217500 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: -0.3882 8 episodes - episode_reward: -128.091 [-405.627, 139.154] - loss: 13.594 - mae: 3.509 - mean_q: 0.272 Interval 89 (220000 steps performed) 2500/2500 [==============================] - 32s 13ms/step - reward: -0.4842 13 episodes - episode_reward: -84.537 [-226.164, 169.401] - loss: 13.538 - mae: 3.495 - mean_q: 0.343 Interval 90 (222500 steps performed) 2500/2500 [==============================] - 31s 13ms/step - reward: -0.4603 10 episodes - episode_reward: -130.215 [-297.711, 221.024] - loss: 12.677 - mae: 3.505 - mean_q: 0.340 Interval 91 (225000 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: -0.4159 9 episodes - episode_reward: -104.308 [-258.588, 158.180] - loss: 11.777 - mae: 3.484 - mean_q: 0.334 Interval 92 (227500 steps performed) 2500/2500 [==============================] - 33s 13ms/step - reward: -0.6167 11 episodes - episode_reward: -139.156 [-284.510, 1.464] - loss: 12.298 - mae: 3.544 - mean_q: 0.345 Interval 93 (230000 steps performed) 2500/2500 [==============================] - 33s 13ms/step - reward: -0.7391 14 episodes - episode_reward: -133.667 [-281.227, -45.703] - loss: 11.412 - mae: 3.480 - mean_q: 0.441 Interval 94 (232500 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: -0.6884 12 episodes - episode_reward: -145.175 [-351.230, 176.167] - loss: 13.424 - mae: 3.579 - mean_q: 0.329 Interval 95 (235000 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: -0.7499 14 episodes - episode_reward: -132.759 [-240.374, -34.928] - loss: 11.199 - mae: 3.595 - mean_q: 0.368 Interval 96 (237500 steps performed) 2500/2500 [==============================] - 35s 14ms/step - reward: -0.8708 10 episodes - episode_reward: -218.735 [-458.875, -56.811] - loss: 12.193 - mae: 3.566 - mean_q: 0.431 Interval 97 (240000 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: -0.5845 10 episodes - episode_reward: -148.322 [-338.923, 192.041] - loss: 12.541 - mae: 3.621 - mean_q: 0.408 Interval 98 (242500 steps performed) 2500/2500 [==============================] - 34s 14ms/step - reward: -0.3592 7 episodes - episode_reward: -124.005 [-343.460, 262.761] - loss: 11.274 - mae: 3.600 - mean_q: 0.395 Interval 99 (245000 steps performed) 2500/2500 [==============================] - 34s 13ms/step - reward: -0.5742 10 episodes - episode_reward: -140.920 [-348.387, 193.675] - loss: 11.408 - mae: 3.646 - mean_q: 0.447 Interval 100 (247500 steps performed) 2500/2500 [==============================] - 33s 13ms/step - reward: -0.2181 done, took 2586.386 seconds
dqn.save_weights(weights_filename, overwrite=True)
weights.append(f'dqn_lunar_weights_nine.h5f')
models.append(dqn)
models[-1].load_weights(weights[-1])
df = pd.read_csv('lunar_training_weights_9')
ax = df['episode_reward'].plot(color = 'lightgray')
df['episode_reward'].rolling(50).mean().plot(color = 'black')
ax.set_xlabel("Episode")
plt.ylabel("Rolling Mean (10) Cumulative Return")
plt.show()
No comparable performance bump is seen. It is clear the window length of 6 is key to our models improvement, and it was not just the increase in memory size and log intervals.
df.to_csv('lunar_training_weights_9')
As a final step I would like to test each trained model for 20 episodes, and have a look at the average reward. As keras-rl prints reward to the stdout when testing, and has no facility to redirect this output, I will redirect the stdout to txt files, and then read the model output back in from those text files
import sys
save = sys.stdout
# Testing each model for 20 episodes and saving reward output to files.
for i in range(0, 9):
env = LunarEirLander.LunarEirLander()
name = 'reward_' + str(i+1) + '.txt'
sys.stdout = open(name, 'w+')
models[i].test(env, nb_episodes=20, visualize=False)
sys.stdout.close()
def get_file_reward(f):
reward = []
i = 1
myfile = open(f)
data = myfile.read().splitlines()
data = [l.split(',') for l in data]
while i < len(data):
reward.append(data[i][0].split(' ')[-1])
i += 1
return(list(map(float, reward)))
# Returning stdout to normal
sys.stdout = save
# retrieving saved rewards from txt files
rewards = []
for i in range(1, 10):
rewards.append(np.array(get_file_reward('reward_'+str(i)+'.txt')))
for i, r in enumerate(rewards):
rl['Model '+(str(i+1))]['Average Reward'] = r.mean()
rl
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | Model 6 | Model 7 | Model 8 | Model 9 | |
|---|---|---|---|---|---|---|---|---|---|
| Window Length | 1 | 1 | 4 | 6 | 6 | 4 | 6 | 6 | 4 |
| Architecture | 128/64/32 | 512/256/128 | 128/64/32 | 128/64/32 | 128/64/32 | 128/64/32 | 128/64/32 | 128/64/32 | 128/64/32 |
| Training Steps | 5000000 | 5000000 | 250000 | 250000 | 2000000 | 1999000 | 250000 | 250000 | 250000 |
| Memory Limit | 50000 | 50000 | 50000 | 50000 | 50000 | 50000 | 100000 | 200000 | 200000 |
| Log Interval | 500 | 500 | 500 | 500 | 500 | 500 | 1500 | 2500 | 2500 |
| Average Reward | -192.067 | -178.465 | -41.0041 | -247.724 | -225.78 | -454.22 | -7.6643 | 66.188 | -101.439 |
It is clear that model 8 with a window_length of 6, and the largest memory limit and log interval size, has performed the best. All other models have a negative reward average. It is also interesting note that this model only trained for 250,000 steps, and is significantly outperforming models that trained for 5,000,000 steps. This serves to highlight the importance of parameter selection.
# saving model 8 as the selected model
chosen_dqn = models[7]
Deploy each of the two models trained to the Lunar Lander game to play 200 episodes and analyse the reward achieved by the models trained using each approach.
First I will test the CNN model on 200 episodes and save the returned reward.
# Load and initialise the control model
ROWS = 32
COLS = 32
CHANNELS = 1
# Load a pre-trained model
model = keras.models.load_model("cnn_choice.mod")
# Load the Lunar Lander environment and initialise it
env = LunarEirLander.LunarEirLander()
s = env.reset()
# Run the game loop
rewards = []
steps = 0
done = False
for i in range(200):
total_reward = 0
steps = 0
done = False
s = env.reset()
while not done:
# Access the rednered scrnen image
raw_image = env.render(mode='rgb_array')
# Prepare the image for presentation to the network - ensure this matches how the model was trained
processed_image = cv2.resize(raw_image, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
processed_image = cv2.cvtColor(processed_image, cv2.COLOR_RGB2GRAY)
processed_image = np.array(processed_image, dtype=np.float)
processed_image = processed_image.reshape((-1, ROWS, COLS, CHANNELS))
processed_image = processed_image/255
# Get the model to make a prediction
a = np.argmax(model.predict(processed_image), axis=-1)
a = a[0]
# Step on the game
s, r, done, info = env.step(a)
env.render()
total_reward += r
steps += 1
env.close()
# print(["{:+0.2f}".format(x) for x in s])
print("step {} total_reward {:+0.2f}".format(steps, total_reward))
rewards.append(total_reward)
step 97 total_reward -499.95 step 90 total_reward -467.13 step 58 total_reward -402.87 step 76 total_reward -517.58 step 177 total_reward -643.93 step 65 total_reward -604.49 step 106 total_reward -221.56 step 99 total_reward -633.50 step 51 total_reward -431.62 step 95 total_reward -438.57 step 78 total_reward -577.88 step 92 total_reward -349.71 step 80 total_reward -383.65 step 88 total_reward -340.22 step 145 total_reward -316.28 step 118 total_reward -405.85 step 98 total_reward -505.19 step 84 total_reward -461.57 step 53 total_reward -519.76 step 93 total_reward -455.70 step 126 total_reward -464.70 step 87 total_reward -803.66 step 76 total_reward -618.42 step 102 total_reward -494.10 step 104 total_reward -1115.38 step 101 total_reward -0.30 step 79 total_reward -165.36 step 200 total_reward -605.19 step 1 total_reward -100.00 step 175 total_reward -616.92 step 82 total_reward -600.49 step 105 total_reward -270.55 step 64 total_reward -552.41 step 78 total_reward -201.52 step 1 total_reward -100.00 step 65 total_reward -630.54 step 166 total_reward -487.23 step 72 total_reward -154.31 step 92 total_reward -480.64 step 132 total_reward -387.54 step 80 total_reward -506.51 step 58 total_reward -433.09 step 85 total_reward -522.64 step 129 total_reward -429.05 step 28 total_reward -263.44 step 61 total_reward -471.07 step 80 total_reward -367.10 step 113 total_reward -536.87 step 67 total_reward -341.55 step 39 total_reward -154.03 step 168 total_reward -296.46 step 145 total_reward -337.92 step 81 total_reward -613.13 step 43 total_reward -171.43 step 66 total_reward -647.79 step 116 total_reward -790.50 step 105 total_reward -635.29 step 80 total_reward -519.35 step 33 total_reward -209.88 step 16 total_reward -178.78 step 58 total_reward -199.46 step 201 total_reward -498.28 step 66 total_reward -45.04 step 61 total_reward -525.90 step 86 total_reward -531.71 step 76 total_reward -448.29 step 86 total_reward -154.15 step 187 total_reward -544.66 step 17 total_reward -173.56 step 98 total_reward -454.23 step 102 total_reward -686.39 step 76 total_reward -208.85 step 62 total_reward -554.67 step 88 total_reward -779.38 step 134 total_reward -711.18 step 219 total_reward -583.79 step 1 total_reward -100.00 step 105 total_reward -318.51 step 9 total_reward -135.09 step 64 total_reward -567.04 step 64 total_reward -486.10 step 76 total_reward -196.19 step 88 total_reward -710.43 step 11 total_reward -119.64 step 97 total_reward -275.82 step 215 total_reward -876.63 step 162 total_reward -281.75 step 80 total_reward -533.05 step 79 total_reward -485.19 step 82 total_reward -465.88 step 95 total_reward -833.97 step 76 total_reward -67.47 step 63 total_reward -216.13 step 37 total_reward -180.52 step 88 total_reward -313.35 step 20 total_reward -128.51 step 8 total_reward -100.09 step 33 total_reward -236.54 step 93 total_reward -488.51 step 23 total_reward -111.01 step 59 total_reward -260.00 step 87 total_reward -388.56 step 96 total_reward -651.15 step 72 total_reward -516.79 step 119 total_reward -475.53 step 85 total_reward -527.86 step 57 total_reward -420.78 step 90 total_reward -597.83 step 105 total_reward -470.86 step 86 total_reward -501.07 step 60 total_reward -574.46 step 125 total_reward -101.61 step 94 total_reward -753.79 step 12 total_reward -144.25 step 73 total_reward -379.76 step 87 total_reward -612.34 step 139 total_reward -317.67 step 95 total_reward -480.56 step 37 total_reward -174.57 step 63 total_reward -482.26 step 135 total_reward -651.23 step 101 total_reward -399.69 step 87 total_reward -675.07 step 55 total_reward -174.39 step 178 total_reward -575.85 step 166 total_reward -272.48 step 77 total_reward -572.69 step 70 total_reward -618.93 step 81 total_reward -568.02 step 57 total_reward -408.23 step 18 total_reward -122.51 step 173 total_reward -658.07 step 43 total_reward -328.06 step 139 total_reward -490.71 step 56 total_reward -148.41 step 66 total_reward -393.79 step 66 total_reward -366.80 step 58 total_reward -273.69 step 87 total_reward -441.50 step 90 total_reward -327.48 step 125 total_reward -425.99 step 70 total_reward -658.30 step 84 total_reward -672.18 step 105 total_reward -249.16 step 75 total_reward -248.59 step 94 total_reward -438.39 step 71 total_reward -642.20 step 50 total_reward -384.87 step 83 total_reward -451.66 step 238 total_reward -400.98 step 103 total_reward -347.17 step 35 total_reward -304.84 step 109 total_reward -507.57 step 104 total_reward -511.93 step 88 total_reward -425.40 step 71 total_reward -577.73 step 118 total_reward -271.17 step 82 total_reward -518.53 step 51 total_reward -272.52 step 49 total_reward -301.04 step 111 total_reward -551.38 step 48 total_reward -260.74 step 76 total_reward -735.48 step 44 total_reward -130.83 step 94 total_reward -829.26 step 51 total_reward -443.12 step 167 total_reward -316.10 step 91 total_reward -215.95 step 15 total_reward -163.35 step 61 total_reward -301.38 step 124 total_reward -388.41 step 75 total_reward -371.90 step 94 total_reward -222.19 step 74 total_reward -615.67 step 90 total_reward -148.82 step 44 total_reward -271.67 step 106 total_reward -562.54 step 129 total_reward -555.98 step 104 total_reward -331.45 step 88 total_reward -410.47 step 92 total_reward -471.93 step 29 total_reward -151.58 step 68 total_reward -740.56 step 24 total_reward -216.82 step 17 total_reward -123.27 step 71 total_reward -612.44 step 38 total_reward -337.36 step 99 total_reward -689.64 step 78 total_reward -583.08 step 55 total_reward -308.10 step 76 total_reward -682.87 step 42 total_reward -312.14 step 81 total_reward -641.01 step 46 total_reward -469.44 step 184 total_reward -607.86 step 117 total_reward -264.69 step 65 total_reward -653.76 step 101 total_reward -663.98 step 90 total_reward -567.60 step 99 total_reward -521.95
# Saving rewards to a text file so I can recover these later if needed
with open('cnn_rewards_backup.txt', 'w') as filehandle:
for r in rewards:
filehandle.write('%s\n' % r)
Now I shall deploy the RL model for 200 episdoes and save the reward.
name = 'final_rl_reward.txt'
# Redirecting stdout output to my file
sys.stdout = open(name, 'w+')
chosen_dqn.test(env, nb_episodes=200, visualize=False)
sys.stdout.close()
# Returning stdout to normal
sys.stdout = save
# Recovering RL reward from file
rl_reward = np.array(get_file_reward('final_rl_reward.txt'))
# Recovering CNN reward from file
cnn_reward = []
myfile = open('cnn_rewards_backup.txt')
data = myfile.read().splitlines()
data = [l.split(' ') for l in data]
for i in range(len(data)):
cnn_reward.append(data[i][-1])
cnn_reward = np.array(list(map(float, cnn_reward)))
plt.figure(figsize=(15,6))
plt.xlabel('Episodes')
plt.xticks(rotation=90)
plt.ylabel('Reward')
plt.plot(rl_reward, 'green', label='RL model')
plt.plot(cnn_reward, 'red', label='CNN model')
plt.legend()
plt.title("Reward earned by models")
plt.show()
Here we can see the RL model reward plotted against the CNN model reward. The RL model consistently outperform the CNN model.
plt.figure(figsize = (12,12))
plt.subplot(2,2,1)
seaborn.boxplot(data=[cnn_reward, rl_reward], orient='v', width = 0.2, palette="Set3")
label = ["CNN", "RL"]
index = np.arange(len(label))
plt.xticks(index, label)
plt.xlabel("Model")
plt.ylabel("Reward")
plt.title("Reward Boxplot")
plt.show()
Here is another view on the reward distribution between the two models. RL's better performance is quite clear. But, we can also see that CNN is more instable, it has a much larger range of reward, when looking at the distance between its' minimum value whisker and maximum value. It also has a significant outlier. The RL model has a smaller range between the whiskers, and no outliers.
stats = pd.DataFrame(index=['Average Reward', '% Positive', 'Max Reward', 'Min Reward', 'Reward Range'])
stats['CNN'] = [cnn_reward.mean(), (np.sum(np.array(cnn_reward) >= 0) / cnn_reward.size) * 100, np.amax(cnn_reward), np.amin(cnn_reward), 0]
stats['RL'] = [rl_reward.mean(), (np.sum(np.array(rl_reward) >= 0) / rl_reward.size) * 100, np.amax(rl_reward), np.amin(rl_reward), 0]
stats['CNN']['Reward Range'] = stats['CNN']['Max Reward'] - stats['CNN']['Min Reward']
stats['RL']['Reward Range'] = stats['RL']['Max Reward'] - stats['RL']['Min Reward']
stats
| CNN | RL | |
|---|---|---|
| Average Reward | -425.676644 | 71.852345 |
| % Positive | 0.000000 | 57.500000 |
| Max Reward | -0.298372 | 338.623000 |
| Min Reward | -1115.378576 | -216.629000 |
| Reward Range | 1115.080204 | 555.252000 |